zipimport.c revision 4925cde1cc20fe559b9c1429a99bf9b1c17f7048
1#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include <time.h>
6
7
8#define IS_SOURCE   0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE  0x2
11
12struct st_zip_searchorder {
13    char suffix[14];
14    int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18   archive: we first search for a package __init__, then for
19   non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20   are swapped by initzipimport() if we run in optimized mode. Also,
21   '/' is replaced by SEP there. */
22static struct st_zip_searchorder zip_searchorder[] = {
23    {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24    {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25    {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26    {".pyc", IS_BYTECODE},
27    {".pyo", IS_BYTECODE},
28    {".py", IS_SOURCE},
29    {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37    PyObject_HEAD
38    PyObject *archive;  /* pathname of the Zip archive */
39    PyObject *prefix;   /* file prefix: "a/sub/directory/" */
40    PyObject *files;    /* dict with file info {path: toc_entry} */
41};
42
43static PyObject *ZipImportError;
44static PyObject *zip_directory_cache = NULL;
45
46/* forward decls */
47static PyObject *read_directory(char *archive);
48static PyObject *get_data(char *archive, PyObject *toc_entry);
49static PyObject *get_module_code(ZipImporter *self, char *fullname,
50                                 int *p_ispackage, char **p_modpath);
51
52
53#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54
55
56/* zipimporter.__init__
57   Split the "subdirectory" from the Zip archive path, lookup a matching
58   entry in sys.path_importer_cache, fetch the file directory from there
59   if found, or else read it from the archive. */
60static int
61zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62{
63    char *path, *p, *prefix, buf[MAXPATHLEN+2];
64    size_t len;
65
66    if (!_PyArg_NoKeywords("zipimporter()", kwds))
67        return -1;
68
69    if (!PyArg_ParseTuple(args, "s:zipimporter", &path))
70        return -1;
71
72    len = strlen(path);
73    if (len == 0) {
74        PyErr_SetString(ZipImportError, "archive path is empty");
75        return -1;
76    }
77    if (len >= MAXPATHLEN) {
78        PyErr_SetString(ZipImportError,
79                        "archive path too long");
80        return -1;
81    }
82    strcpy(buf, path);
83
84#ifdef ALTSEP
85    for (p = buf; *p; p++) {
86        if (*p == ALTSEP)
87            *p = SEP;
88    }
89#endif
90
91    path = NULL;
92    prefix = NULL;
93    for (;;) {
94        struct stat statbuf;
95        int rv;
96
97        rv = stat(buf, &statbuf);
98        if (rv == 0) {
99            /* it exists */
100            if (S_ISREG(statbuf.st_mode))
101                /* it's a file */
102                path = buf;
103            break;
104        }
105        /* back up one path element */
106        p = strrchr(buf, SEP);
107        if (prefix != NULL)
108            *prefix = SEP;
109        if (p == NULL)
110            break;
111        *p = '\0';
112        prefix = p;
113    }
114    if (path != NULL) {
115        PyObject *files;
116        files = PyDict_GetItemString(zip_directory_cache, path);
117        if (files == NULL) {
118            files = read_directory(buf);
119            if (files == NULL)
120                return -1;
121            if (PyDict_SetItemString(zip_directory_cache, path,
122                                     files) != 0)
123                return -1;
124        }
125        else
126            Py_INCREF(files);
127        self->files = files;
128    }
129    else {
130        PyErr_SetString(ZipImportError, "not a Zip file");
131        return -1;
132    }
133
134    if (prefix == NULL)
135        prefix = "";
136    else {
137        prefix++;
138        len = strlen(prefix);
139        if (prefix[len-1] != SEP) {
140            /* add trailing SEP */
141            prefix[len] = SEP;
142            prefix[len + 1] = '\0';
143        }
144    }
145
146    self->archive = PyUnicode_FromString(buf);
147    if (self->archive == NULL)
148        return -1;
149
150    self->prefix = PyUnicode_FromString(prefix);
151    if (self->prefix == NULL)
152        return -1;
153
154    return 0;
155}
156
157/* GC support. */
158static int
159zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
160{
161    ZipImporter *self = (ZipImporter *)obj;
162    Py_VISIT(self->files);
163    return 0;
164}
165
166static void
167zipimporter_dealloc(ZipImporter *self)
168{
169    PyObject_GC_UnTrack(self);
170    Py_XDECREF(self->archive);
171    Py_XDECREF(self->prefix);
172    Py_XDECREF(self->files);
173    Py_TYPE(self)->tp_free((PyObject *)self);
174}
175
176static PyObject *
177zipimporter_repr(ZipImporter *self)
178{
179    char *archive = "???";
180    char *prefix = "";
181
182    if (self->archive != NULL && PyUnicode_Check(self->archive))
183        archive = _PyUnicode_AsString(self->archive);
184    if (self->prefix != NULL && PyUnicode_Check(self->prefix))
185        prefix = _PyUnicode_AsString(self->prefix);
186    if (prefix != NULL && *prefix)
187        return PyUnicode_FromFormat("<zipimporter object \"%.300s%c%.150s\">",
188                                    archive, SEP, prefix);
189    else
190        return PyUnicode_FromFormat("<zipimporter object \"%.300s\">",
191                                    archive);
192}
193
194/* return fullname.split(".")[-1] */
195static char *
196get_subname(char *fullname)
197{
198    char *subname = strrchr(fullname, '.');
199    if (subname == NULL)
200        subname = fullname;
201    else
202        subname++;
203    return subname;
204}
205
206/* Given a (sub)modulename, write the potential file path in the
207   archive (without extension) to the path buffer. Return the
208   length of the resulting string. */
209static int
210make_filename(char *prefix, char *name, char *path)
211{
212    size_t len;
213    char *p;
214
215    len = strlen(prefix);
216
217    /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
218    if (len + strlen(name) + 13 >= MAXPATHLEN) {
219        PyErr_SetString(ZipImportError, "path too long");
220        return -1;
221    }
222
223    strcpy(path, prefix);
224    strcpy(path + len, name);
225    for (p = path + len; *p; p++) {
226        if (*p == '.')
227            *p = SEP;
228    }
229    len += strlen(name);
230    assert(len < INT_MAX);
231    return (int)len;
232}
233
234enum zi_module_info {
235    MI_ERROR,
236    MI_NOT_FOUND,
237    MI_MODULE,
238    MI_PACKAGE
239};
240
241/* Return some information about a module. */
242static enum zi_module_info
243get_module_info(ZipImporter *self, char *fullname)
244{
245    char *subname, path[MAXPATHLEN + 1];
246    int len;
247    struct st_zip_searchorder *zso;
248
249    subname = get_subname(fullname);
250
251    len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
252    if (len < 0)
253        return MI_ERROR;
254
255    for (zso = zip_searchorder; *zso->suffix; zso++) {
256        strcpy(path + len, zso->suffix);
257        if (PyDict_GetItemString(self->files, path) != NULL) {
258            if (zso->type & IS_PACKAGE)
259                return MI_PACKAGE;
260            else
261                return MI_MODULE;
262        }
263    }
264    return MI_NOT_FOUND;
265}
266
267/* Check whether we can satisfy the import of the module named by
268   'fullname'. Return self if we can, None if we can't. */
269static PyObject *
270zipimporter_find_module(PyObject *obj, PyObject *args)
271{
272    ZipImporter *self = (ZipImporter *)obj;
273    PyObject *path = NULL;
274    char *fullname;
275    enum zi_module_info mi;
276
277    if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
278                          &fullname, &path))
279        return NULL;
280
281    mi = get_module_info(self, fullname);
282    if (mi == MI_ERROR)
283        return NULL;
284    if (mi == MI_NOT_FOUND) {
285        Py_INCREF(Py_None);
286        return Py_None;
287    }
288    Py_INCREF(self);
289    return (PyObject *)self;
290}
291
292/* Load and return the module named by 'fullname'. */
293static PyObject *
294zipimporter_load_module(PyObject *obj, PyObject *args)
295{
296    ZipImporter *self = (ZipImporter *)obj;
297    PyObject *code, *mod, *dict;
298    char *fullname, *modpath;
299    int ispackage;
300
301    if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
302                          &fullname))
303        return NULL;
304
305    code = get_module_code(self, fullname, &ispackage, &modpath);
306    if (code == NULL)
307        return NULL;
308
309    mod = PyImport_AddModule(fullname);
310    if (mod == NULL) {
311        Py_DECREF(code);
312        return NULL;
313    }
314    dict = PyModule_GetDict(mod);
315
316    /* mod.__loader__ = self */
317    if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
318        goto error;
319
320    if (ispackage) {
321        /* add __path__ to the module *before* the code gets
322           executed */
323        PyObject *pkgpath, *fullpath;
324        char *subname = get_subname(fullname);
325        int err;
326
327        fullpath = PyUnicode_FromFormat("%U%c%U%s",
328                                self->archive, SEP,
329                                self->prefix, subname);
330        if (fullpath == NULL)
331            goto error;
332
333        pkgpath = Py_BuildValue("[O]", fullpath);
334        Py_DECREF(fullpath);
335        if (pkgpath == NULL)
336            goto error;
337        err = PyDict_SetItemString(dict, "__path__", pkgpath);
338        Py_DECREF(pkgpath);
339        if (err != 0)
340            goto error;
341    }
342    mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
343    Py_DECREF(code);
344    if (Py_VerboseFlag)
345        PySys_WriteStderr("import %s # loaded from Zip %s\n",
346                          fullname, modpath);
347    return mod;
348error:
349    Py_DECREF(code);
350    Py_DECREF(mod);
351    return NULL;
352}
353
354/* Return a string matching __file__ for the named module */
355static PyObject *
356zipimporter_get_filename(PyObject *obj, PyObject *args)
357{
358    ZipImporter *self = (ZipImporter *)obj;
359    PyObject *code;
360    char *fullname, *modpath;
361    int ispackage;
362
363    if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
364                         &fullname))
365    return NULL;
366
367    /* Deciding the filename requires working out where the code
368       would come from if the module was actually loaded */
369    code = get_module_code(self, fullname, &ispackage, &modpath);
370    if (code == NULL)
371    return NULL;
372    Py_DECREF(code); /* Only need the path info */
373
374    return PyUnicode_FromString(modpath);
375}
376
377/* Return a bool signifying whether the module is a package or not. */
378static PyObject *
379zipimporter_is_package(PyObject *obj, PyObject *args)
380{
381    ZipImporter *self = (ZipImporter *)obj;
382    char *fullname;
383    enum zi_module_info mi;
384
385    if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
386                          &fullname))
387        return NULL;
388
389    mi = get_module_info(self, fullname);
390    if (mi == MI_ERROR)
391        return NULL;
392    if (mi == MI_NOT_FOUND) {
393        PyErr_Format(ZipImportError, "can't find module '%.200s'",
394                     fullname);
395        return NULL;
396    }
397    return PyBool_FromLong(mi == MI_PACKAGE);
398}
399
400static PyObject *
401zipimporter_get_data(PyObject *obj, PyObject *args)
402{
403    ZipImporter *self = (ZipImporter *)obj;
404    char *path;
405#ifdef ALTSEP
406    char *p, buf[MAXPATHLEN + 1];
407#endif
408    PyObject *toc_entry;
409    Py_ssize_t len;
410    char *archive_str;
411
412    if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
413        return NULL;
414
415#ifdef ALTSEP
416    if (strlen(path) >= MAXPATHLEN) {
417        PyErr_SetString(ZipImportError, "path too long");
418        return NULL;
419    }
420    strcpy(buf, path);
421    for (p = buf; *p; p++) {
422        if (*p == ALTSEP)
423            *p = SEP;
424    }
425    path = buf;
426#endif
427    archive_str = _PyUnicode_AsStringAndSize(self->archive, &len);
428    if ((size_t)len < strlen(path) &&
429        strncmp(path, archive_str, len) == 0 &&
430        path[len] == SEP) {
431        path = path + len + 1;
432    }
433
434    toc_entry = PyDict_GetItemString(self->files, path);
435    if (toc_entry == NULL) {
436        PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
437        return NULL;
438    }
439    return get_data(archive_str, toc_entry);
440}
441
442static PyObject *
443zipimporter_get_code(PyObject *obj, PyObject *args)
444{
445    ZipImporter *self = (ZipImporter *)obj;
446    char *fullname;
447
448    if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
449        return NULL;
450
451    return get_module_code(self, fullname, NULL, NULL);
452}
453
454static PyObject *
455zipimporter_get_source(PyObject *obj, PyObject *args)
456{
457    ZipImporter *self = (ZipImporter *)obj;
458    PyObject *toc_entry;
459    char *fullname, *subname, path[MAXPATHLEN+1];
460    int len;
461    enum zi_module_info mi;
462
463    if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
464        return NULL;
465
466    mi = get_module_info(self, fullname);
467    if (mi == MI_ERROR)
468        return NULL;
469    if (mi == MI_NOT_FOUND) {
470        PyErr_Format(ZipImportError, "can't find module '%.200s'",
471                     fullname);
472        return NULL;
473    }
474    subname = get_subname(fullname);
475
476    len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
477    if (len < 0)
478        return NULL;
479
480    if (mi == MI_PACKAGE) {
481        path[len] = SEP;
482        strcpy(path + len + 1, "__init__.py");
483    }
484    else
485        strcpy(path + len, ".py");
486
487    toc_entry = PyDict_GetItemString(self->files, path);
488    if (toc_entry != NULL) {
489        PyObject *bytes = get_data(_PyUnicode_AsString(self->archive), toc_entry);
490        PyObject *res = PyUnicode_FromString(PyBytes_AsString(bytes));
491        Py_XDECREF(bytes);
492        return res;
493    }
494
495    /* we have the module, but no source */
496    Py_INCREF(Py_None);
497    return Py_None;
498}
499
500PyDoc_STRVAR(doc_find_module,
501"find_module(fullname, path=None) -> self or None.\n\
502\n\
503Search for a module specified by 'fullname'. 'fullname' must be the\n\
504fully qualified (dotted) module name. It returns the zipimporter\n\
505instance itself if the module was found, or None if it wasn't.\n\
506The optional 'path' argument is ignored -- it's there for compatibility\n\
507with the importer protocol.");
508
509PyDoc_STRVAR(doc_load_module,
510"load_module(fullname) -> module.\n\
511\n\
512Load the module specified by 'fullname'. 'fullname' must be the\n\
513fully qualified (dotted) module name. It returns the imported\n\
514module, or raises ZipImportError if it wasn't found.");
515
516PyDoc_STRVAR(doc_get_data,
517"get_data(pathname) -> string with file data.\n\
518\n\
519Return the data associated with 'pathname'. Raise IOError if\n\
520the file wasn't found.");
521
522PyDoc_STRVAR(doc_is_package,
523"is_package(fullname) -> bool.\n\
524\n\
525Return True if the module specified by fullname is a package.\n\
526Raise ZipImportError if the module couldn't be found.");
527
528PyDoc_STRVAR(doc_get_code,
529"get_code(fullname) -> code object.\n\
530\n\
531Return the code object for the specified module. Raise ZipImportError\n\
532if the module couldn't be found.");
533
534PyDoc_STRVAR(doc_get_source,
535"get_source(fullname) -> source string.\n\
536\n\
537Return the source code for the specified module. Raise ZipImportError\n\
538if the module couldn't be found, return None if the archive does\n\
539contain the module, but has no source for it.");
540
541
542PyDoc_STRVAR(doc_get_filename,
543"get_filename(fullname) -> filename string.\n\
544\n\
545Return the filename for the specified module.");
546
547static PyMethodDef zipimporter_methods[] = {
548    {"find_module", zipimporter_find_module, METH_VARARGS,
549     doc_find_module},
550    {"load_module", zipimporter_load_module, METH_VARARGS,
551     doc_load_module},
552    {"get_data", zipimporter_get_data, METH_VARARGS,
553     doc_get_data},
554    {"get_code", zipimporter_get_code, METH_VARARGS,
555     doc_get_code},
556    {"get_source", zipimporter_get_source, METH_VARARGS,
557     doc_get_source},
558    {"get_filename", zipimporter_get_filename, METH_VARARGS,
559     doc_get_filename},
560    {"is_package", zipimporter_is_package, METH_VARARGS,
561     doc_is_package},
562    {NULL,              NULL}   /* sentinel */
563};
564
565static PyMemberDef zipimporter_members[] = {
566    {"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
567    {"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
568    {"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
569    {NULL}
570};
571
572PyDoc_STRVAR(zipimporter_doc,
573"zipimporter(archivepath) -> zipimporter object\n\
574\n\
575Create a new zipimporter instance. 'archivepath' must be a path to\n\
576a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
577'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
578valid directory inside the archive.\n\
579\n\
580'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
581archive.\n\
582\n\
583The 'archive' attribute of zipimporter objects contains the name of the\n\
584zipfile targeted.");
585
586#define DEFERRED_ADDRESS(ADDR) 0
587
588static PyTypeObject ZipImporter_Type = {
589    PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
590    "zipimport.zipimporter",
591    sizeof(ZipImporter),
592    0,                                          /* tp_itemsize */
593    (destructor)zipimporter_dealloc,            /* tp_dealloc */
594    0,                                          /* tp_print */
595    0,                                          /* tp_getattr */
596    0,                                          /* tp_setattr */
597    0,                                          /* tp_reserved */
598    (reprfunc)zipimporter_repr,                 /* tp_repr */
599    0,                                          /* tp_as_number */
600    0,                                          /* tp_as_sequence */
601    0,                                          /* tp_as_mapping */
602    0,                                          /* tp_hash */
603    0,                                          /* tp_call */
604    0,                                          /* tp_str */
605    PyObject_GenericGetAttr,                    /* tp_getattro */
606    0,                                          /* tp_setattro */
607    0,                                          /* tp_as_buffer */
608    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
609        Py_TPFLAGS_HAVE_GC,                     /* tp_flags */
610    zipimporter_doc,                            /* tp_doc */
611    zipimporter_traverse,                       /* tp_traverse */
612    0,                                          /* tp_clear */
613    0,                                          /* tp_richcompare */
614    0,                                          /* tp_weaklistoffset */
615    0,                                          /* tp_iter */
616    0,                                          /* tp_iternext */
617    zipimporter_methods,                        /* tp_methods */
618    zipimporter_members,                        /* tp_members */
619    0,                                          /* tp_getset */
620    0,                                          /* tp_base */
621    0,                                          /* tp_dict */
622    0,                                          /* tp_descr_get */
623    0,                                          /* tp_descr_set */
624    0,                                          /* tp_dictoffset */
625    (initproc)zipimporter_init,                 /* tp_init */
626    PyType_GenericAlloc,                        /* tp_alloc */
627    PyType_GenericNew,                          /* tp_new */
628    PyObject_GC_Del,                            /* tp_free */
629};
630
631
632/* implementation */
633
634/* Given a buffer, return the long that is represented by the first
635   4 bytes, encoded as little endian. This partially reimplements
636   marshal.c:r_long() */
637static long
638get_long(unsigned char *buf) {
639    long x;
640    x =  buf[0];
641    x |= (long)buf[1] <<  8;
642    x |= (long)buf[2] << 16;
643    x |= (long)buf[3] << 24;
644#if SIZEOF_LONG > 4
645    /* Sign extension for 64-bit machines */
646    x |= -(x & 0x80000000L);
647#endif
648    return x;
649}
650
651/*
652   read_directory(archive) -> files dict (new reference)
653
654   Given a path to a Zip archive, build a dict, mapping file names
655   (local to the archive, using SEP as a separator) to toc entries.
656
657   A toc_entry is a tuple:
658
659       (__file__,      # value to use for __file__, available for all files
660    compress,      # compression kind; 0 for uncompressed
661    data_size,     # size of compressed data on disk
662    file_size,     # size of decompressed data
663    file_offset,   # offset of file header from start of archive
664    time,          # mod time of file (in dos format)
665    date,          # mod data of file (in dos format)
666    crc,           # crc checksum of the data
667       )
668
669   Directories can be recognized by the trailing SEP in the name,
670   data_size and file_offset are 0.
671*/
672static PyObject *
673read_directory(char *archive)
674{
675    PyObject *files = NULL;
676    FILE *fp;
677    long compress, crc, data_size, file_size, file_offset, date, time;
678    long header_offset, name_size, header_size, header_position;
679    long i, l, count;
680    size_t length;
681    char path[MAXPATHLEN + 5];
682    char name[MAXPATHLEN + 5];
683    char *p, endof_central_dir[22];
684    long arc_offset; /* offset from beginning of file to start of zip-archive */
685
686    if (strlen(archive) > MAXPATHLEN) {
687        PyErr_SetString(PyExc_OverflowError,
688                        "Zip path name is too long");
689        return NULL;
690    }
691    strcpy(path, archive);
692
693    fp = fopen(archive, "rb");
694    if (fp == NULL) {
695        PyErr_Format(ZipImportError, "can't open Zip file: "
696                     "'%.200s'", archive);
697        return NULL;
698    }
699    fseek(fp, -22, SEEK_END);
700    header_position = ftell(fp);
701    if (fread(endof_central_dir, 1, 22, fp) != 22) {
702        fclose(fp);
703        PyErr_Format(ZipImportError, "can't read Zip file: "
704                     "'%.200s'", archive);
705        return NULL;
706    }
707    if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
708        /* Bad: End of Central Dir signature */
709        fclose(fp);
710        PyErr_Format(ZipImportError, "not a Zip file: "
711                     "'%.200s'", archive);
712        return NULL;
713    }
714
715    header_size = get_long((unsigned char *)endof_central_dir + 12);
716    header_offset = get_long((unsigned char *)endof_central_dir + 16);
717    arc_offset = header_position - header_offset - header_size;
718    header_offset += arc_offset;
719
720    files = PyDict_New();
721    if (files == NULL)
722        goto error;
723
724    length = (long)strlen(path);
725    path[length] = SEP;
726
727    /* Start of Central Directory */
728    count = 0;
729    for (;;) {
730        PyObject *t;
731        int err;
732
733        fseek(fp, header_offset, 0);  /* Start of file header */
734        l = PyMarshal_ReadLongFromFile(fp);
735        if (l != 0x02014B50)
736            break;              /* Bad: Central Dir File Header */
737        fseek(fp, header_offset + 10, 0);
738        compress = PyMarshal_ReadShortFromFile(fp);
739        time = PyMarshal_ReadShortFromFile(fp);
740        date = PyMarshal_ReadShortFromFile(fp);
741        crc = PyMarshal_ReadLongFromFile(fp);
742        data_size = PyMarshal_ReadLongFromFile(fp);
743        file_size = PyMarshal_ReadLongFromFile(fp);
744        name_size = PyMarshal_ReadShortFromFile(fp);
745        header_size = 46 + name_size +
746           PyMarshal_ReadShortFromFile(fp) +
747           PyMarshal_ReadShortFromFile(fp);
748        fseek(fp, header_offset + 42, 0);
749        file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
750        if (name_size > MAXPATHLEN)
751            name_size = MAXPATHLEN;
752
753        p = name;
754        for (i = 0; i < name_size; i++) {
755            *p = (char)getc(fp);
756            if (*p == '/')
757                *p = SEP;
758            p++;
759        }
760        *p = 0;         /* Add terminating null byte */
761        header_offset += header_size;
762
763        strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
764
765        t = Py_BuildValue("siiiiiii", path, compress, data_size,
766                          file_size, file_offset, time, date, crc);
767        if (t == NULL)
768            goto error;
769        err = PyDict_SetItemString(files, name, t);
770        Py_DECREF(t);
771        if (err != 0)
772            goto error;
773        count++;
774    }
775    fclose(fp);
776    if (Py_VerboseFlag)
777        PySys_WriteStderr("# zipimport: found %ld names in %s\n",
778            count, archive);
779    return files;
780error:
781    fclose(fp);
782    Py_XDECREF(files);
783    return NULL;
784}
785
786/* Return the zlib.decompress function object, or NULL if zlib couldn't
787   be imported. The function is cached when found, so subsequent calls
788   don't import zlib again. */
789static PyObject *
790get_decompress_func(void)
791{
792    static int importing_zlib = 0;
793    PyObject *zlib;
794    PyObject *decompress;
795
796    if (importing_zlib != 0)
797        /* Someone has a zlib.py[co] in their Zip file;
798           let's avoid a stack overflow. */
799        return NULL;
800    importing_zlib = 1;
801    zlib = PyImport_ImportModuleNoBlock("zlib");
802    importing_zlib = 0;
803    if (zlib != NULL) {
804        decompress = PyObject_GetAttrString(zlib,
805                                            "decompress");
806        Py_DECREF(zlib);
807    }
808    else {
809        PyErr_Clear();
810        decompress = NULL;
811    }
812    if (Py_VerboseFlag)
813        PySys_WriteStderr("# zipimport: zlib %s\n",
814            zlib != NULL ? "available": "UNAVAILABLE");
815    return decompress;
816}
817
818/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
819   data as a new reference. */
820static PyObject *
821get_data(char *archive, PyObject *toc_entry)
822{
823    PyObject *raw_data, *data = NULL, *decompress;
824    char *buf;
825    FILE *fp;
826    int err;
827    Py_ssize_t bytes_read = 0;
828    long l;
829    char *datapath;
830    long compress, data_size, file_size, file_offset, bytes_size;
831    long time, date, crc;
832
833    if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
834                          &data_size, &file_size, &file_offset, &time,
835                          &date, &crc)) {
836        return NULL;
837    }
838
839    fp = fopen(archive, "rb");
840    if (!fp) {
841        PyErr_Format(PyExc_IOError,
842           "zipimport: can not open file %s", archive);
843        return NULL;
844    }
845
846    /* Check to make sure the local file header is correct */
847    fseek(fp, file_offset, 0);
848    l = PyMarshal_ReadLongFromFile(fp);
849    if (l != 0x04034B50) {
850        /* Bad: Local File Header */
851        PyErr_Format(ZipImportError,
852                     "bad local file header in %s",
853                     archive);
854        fclose(fp);
855        return NULL;
856    }
857    fseek(fp, file_offset + 26, 0);
858    l = 30 + PyMarshal_ReadShortFromFile(fp) +
859        PyMarshal_ReadShortFromFile(fp);        /* local header size */
860    file_offset += l;           /* Start of file data */
861
862    bytes_size = compress == 0 ? data_size : data_size + 1;
863    if (bytes_size == 0)
864        bytes_size++;
865    raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
866
867    if (raw_data == NULL) {
868        fclose(fp);
869        return NULL;
870    }
871    buf = PyBytes_AsString(raw_data);
872
873    err = fseek(fp, file_offset, 0);
874    if (err == 0)
875        bytes_read = fread(buf, 1, data_size, fp);
876    fclose(fp);
877    if (err || bytes_read != data_size) {
878        PyErr_SetString(PyExc_IOError,
879                        "zipimport: can't read data");
880        Py_DECREF(raw_data);
881        return NULL;
882    }
883
884    if (compress != 0) {
885        buf[data_size] = 'Z';  /* saw this in zipfile.py */
886        data_size++;
887    }
888    buf[data_size] = '\0';
889
890    if (compress == 0) {  /* data is not compressed */
891        data = PyBytes_FromStringAndSize(buf, data_size);
892        Py_DECREF(raw_data);
893        return data;
894    }
895
896    /* Decompress with zlib */
897    decompress = get_decompress_func();
898    if (decompress == NULL) {
899        PyErr_SetString(ZipImportError,
900                        "can't decompress data; "
901                        "zlib not available");
902        goto error;
903    }
904    data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
905    Py_DECREF(decompress);
906error:
907    Py_DECREF(raw_data);
908    return data;
909}
910
911/* Lenient date/time comparison function. The precision of the mtime
912   in the archive is lower than the mtime stored in a .pyc: we
913   must allow a difference of at most one second. */
914static int
915eq_mtime(time_t t1, time_t t2)
916{
917    time_t d = t1 - t2;
918    if (d < 0)
919        d = -d;
920    /* dostime only stores even seconds, so be lenient */
921    return d <= 1;
922}
923
924/* Given the contents of a .py[co] file in a buffer, unmarshal the data
925   and return the code object. Return None if it the magic word doesn't
926   match (we do this instead of raising an exception as we fall back
927   to .py if available and we don't want to mask other errors).
928   Returns a new reference. */
929static PyObject *
930unmarshal_code(char *pathname, PyObject *data, time_t mtime)
931{
932    PyObject *code;
933    char *buf = PyBytes_AsString(data);
934    Py_ssize_t size = PyBytes_Size(data);
935
936    if (size <= 9) {
937        PyErr_SetString(ZipImportError,
938                        "bad pyc data");
939        return NULL;
940    }
941
942    if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
943        if (Py_VerboseFlag)
944            PySys_WriteStderr("# %s has bad magic\n",
945                              pathname);
946        Py_INCREF(Py_None);
947        return Py_None;  /* signal caller to try alternative */
948    }
949
950    if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
951                                mtime)) {
952        if (Py_VerboseFlag)
953            PySys_WriteStderr("# %s has bad mtime\n",
954                              pathname);
955        Py_INCREF(Py_None);
956        return Py_None;  /* signal caller to try alternative */
957    }
958
959    code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
960    if (code == NULL)
961        return NULL;
962    if (!PyCode_Check(code)) {
963        Py_DECREF(code);
964        PyErr_Format(PyExc_TypeError,
965             "compiled module %.200s is not a code object",
966             pathname);
967        return NULL;
968    }
969    return code;
970}
971
972/* Replace any occurances of "\r\n?" in the input string with "\n".
973   This converts DOS and Mac line endings to Unix line endings.
974   Also append a trailing "\n" to be compatible with
975   PyParser_SimpleParseFile(). Returns a new reference. */
976static PyObject *
977normalize_line_endings(PyObject *source)
978{
979    char *buf, *q, *p = PyBytes_AsString(source);
980    PyObject *fixed_source;
981    int len = 0;
982
983    if (!p) {
984        return PyBytes_FromStringAndSize("\n\0", 2);
985    }
986
987    /* one char extra for trailing \n and one for terminating \0 */
988    buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
989    if (buf == NULL) {
990        PyErr_SetString(PyExc_MemoryError,
991                        "zipimport: no memory to allocate "
992                        "source buffer");
993        return NULL;
994    }
995    /* replace "\r\n?" by "\n" */
996    for (q = buf; *p != '\0'; p++) {
997        if (*p == '\r') {
998            *q++ = '\n';
999            if (*(p + 1) == '\n')
1000                p++;
1001        }
1002        else
1003            *q++ = *p;
1004        len++;
1005    }
1006    *q++ = '\n';  /* add trailing \n */
1007    *q = '\0';
1008    fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1009    PyMem_Free(buf);
1010    return fixed_source;
1011}
1012
1013/* Given a string buffer containing Python source code, compile it
1014   return and return a code object as a new reference. */
1015static PyObject *
1016compile_source(char *pathname, PyObject *source)
1017{
1018    PyObject *code, *fixed_source;
1019
1020    fixed_source = normalize_line_endings(source);
1021    if (fixed_source == NULL)
1022        return NULL;
1023
1024    code = Py_CompileString(PyBytes_AsString(fixed_source), pathname,
1025                            Py_file_input);
1026    Py_DECREF(fixed_source);
1027    return code;
1028}
1029
1030/* Convert the date/time values found in the Zip archive to a value
1031   that's compatible with the time stamp stored in .pyc files. */
1032static time_t
1033parse_dostime(int dostime, int dosdate)
1034{
1035    struct tm stm;
1036
1037    memset((void *) &stm, '\0', sizeof(stm));
1038
1039    stm.tm_sec   =  (dostime        & 0x1f) * 2;
1040    stm.tm_min   =  (dostime >> 5)  & 0x3f;
1041    stm.tm_hour  =  (dostime >> 11) & 0x1f;
1042    stm.tm_mday  =   dosdate        & 0x1f;
1043    stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1044    stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1045    stm.tm_isdst =   -1; /* wday/yday is ignored */
1046
1047    return mktime(&stm);
1048}
1049
1050/* Given a path to a .pyc or .pyo file in the archive, return the
1051   modification time of the matching .py file, or 0 if no source
1052   is available. */
1053static time_t
1054get_mtime_of_source(ZipImporter *self, char *path)
1055{
1056    PyObject *toc_entry;
1057    time_t mtime = 0;
1058    Py_ssize_t lastchar = strlen(path) - 1;
1059    char savechar = path[lastchar];
1060    path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
1061    toc_entry = PyDict_GetItemString(self->files, path);
1062    if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1063        PyTuple_Size(toc_entry) == 8) {
1064        /* fetch the time stamp of the .py file for comparison
1065           with an embedded pyc time stamp */
1066        int time, date;
1067        time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1068        date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1069        mtime = parse_dostime(time, date);
1070    }
1071    path[lastchar] = savechar;
1072    return mtime;
1073}
1074
1075/* Return the code object for the module named by 'fullname' from the
1076   Zip archive as a new reference. */
1077static PyObject *
1078get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1079                   time_t mtime, PyObject *toc_entry)
1080{
1081    PyObject *data, *code;
1082    char *modpath;
1083    char *archive = _PyUnicode_AsString(self->archive);
1084
1085    if (archive == NULL)
1086        return NULL;
1087
1088    data = get_data(archive, toc_entry);
1089    if (data == NULL)
1090        return NULL;
1091
1092    modpath = _PyUnicode_AsString(PyTuple_GetItem(toc_entry, 0));
1093
1094    if (isbytecode) {
1095        code = unmarshal_code(modpath, data, mtime);
1096    }
1097    else {
1098        code = compile_source(modpath, data);
1099    }
1100    Py_DECREF(data);
1101    return code;
1102}
1103
1104/* Get the code object associated with the module specified by
1105   'fullname'. */
1106static PyObject *
1107get_module_code(ZipImporter *self, char *fullname,
1108                int *p_ispackage, char **p_modpath)
1109{
1110    PyObject *toc_entry;
1111    char *subname, path[MAXPATHLEN + 1];
1112    int len;
1113    struct st_zip_searchorder *zso;
1114
1115    subname = get_subname(fullname);
1116
1117    len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
1118    if (len < 0)
1119        return NULL;
1120
1121    for (zso = zip_searchorder; *zso->suffix; zso++) {
1122        PyObject *code = NULL;
1123
1124        strcpy(path + len, zso->suffix);
1125        if (Py_VerboseFlag > 1)
1126            PySys_WriteStderr("# trying %s%c%s\n",
1127                              _PyUnicode_AsString(self->archive),
1128                              (int)SEP, path);
1129        toc_entry = PyDict_GetItemString(self->files, path);
1130        if (toc_entry != NULL) {
1131            time_t mtime = 0;
1132            int ispackage = zso->type & IS_PACKAGE;
1133            int isbytecode = zso->type & IS_BYTECODE;
1134
1135            if (isbytecode)
1136                mtime = get_mtime_of_source(self, path);
1137            if (p_ispackage != NULL)
1138                *p_ispackage = ispackage;
1139            code = get_code_from_data(self, ispackage,
1140                                      isbytecode, mtime,
1141                                      toc_entry);
1142            if (code == Py_None) {
1143                /* bad magic number or non-matching mtime
1144                   in byte code, try next */
1145                Py_DECREF(code);
1146                continue;
1147            }
1148            if (code != NULL && p_modpath != NULL)
1149                *p_modpath = _PyUnicode_AsString(
1150                    PyTuple_GetItem(toc_entry, 0));
1151            return code;
1152        }
1153    }
1154    PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1155    return NULL;
1156}
1157
1158
1159/* Module init */
1160
1161PyDoc_STRVAR(zipimport_doc,
1162"zipimport provides support for importing Python modules from Zip archives.\n\
1163\n\
1164This module exports three objects:\n\
1165- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1166- ZipImportError: exception raised by zipimporter objects. It's a\n\
1167  subclass of ImportError, so it can be caught as ImportError, too.\n\
1168- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1169  info dicts, as used in zipimporter._files.\n\
1170\n\
1171It is usually not needed to use the zipimport module explicitly; it is\n\
1172used by the builtin import mechanism for sys.path items that are paths\n\
1173to Zip archives.");
1174
1175static struct PyModuleDef zipimportmodule = {
1176    PyModuleDef_HEAD_INIT,
1177    "zipimport",
1178    zipimport_doc,
1179    -1,
1180    NULL,
1181    NULL,
1182    NULL,
1183    NULL,
1184    NULL
1185};
1186
1187PyMODINIT_FUNC
1188PyInit_zipimport(void)
1189{
1190    PyObject *mod;
1191
1192    if (PyType_Ready(&ZipImporter_Type) < 0)
1193        return NULL;
1194
1195    /* Correct directory separator */
1196    zip_searchorder[0].suffix[0] = SEP;
1197    zip_searchorder[1].suffix[0] = SEP;
1198    zip_searchorder[2].suffix[0] = SEP;
1199    if (Py_OptimizeFlag) {
1200        /* Reverse *.pyc and *.pyo */
1201        struct st_zip_searchorder tmp;
1202        tmp = zip_searchorder[0];
1203        zip_searchorder[0] = zip_searchorder[1];
1204        zip_searchorder[1] = tmp;
1205        tmp = zip_searchorder[3];
1206        zip_searchorder[3] = zip_searchorder[4];
1207        zip_searchorder[4] = tmp;
1208    }
1209
1210    mod = PyModule_Create(&zipimportmodule);
1211    if (mod == NULL)
1212        return NULL;
1213
1214    ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1215                                        PyExc_ImportError, NULL);
1216    if (ZipImportError == NULL)
1217        return NULL;
1218
1219    Py_INCREF(ZipImportError);
1220    if (PyModule_AddObject(mod, "ZipImportError",
1221                           ZipImportError) < 0)
1222        return NULL;
1223
1224    Py_INCREF(&ZipImporter_Type);
1225    if (PyModule_AddObject(mod, "zipimporter",
1226                           (PyObject *)&ZipImporter_Type) < 0)
1227        return NULL;
1228
1229    zip_directory_cache = PyDict_New();
1230    if (zip_directory_cache == NULL)
1231        return NULL;
1232    Py_INCREF(zip_directory_cache);
1233    if (PyModule_AddObject(mod, "_zip_directory_cache",
1234                           zip_directory_cache) < 0)
1235        return NULL;
1236    return mod;
1237}
1238