zipimport.c revision ced27516f5ff24d91382b5e9fb513f0682617a2e
1#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include <time.h>
6
7
8#define IS_SOURCE   0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE  0x2
11
12struct st_zip_searchorder {
13    char suffix[14];
14    int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18   archive: we first search for a package __init__, then for
19   non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20   are swapped by initzipimport() if we run in optimized mode. Also,
21   '/' is replaced by SEP there. */
22static struct st_zip_searchorder zip_searchorder[] = {
23    {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24    {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25    {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26    {".pyc", IS_BYTECODE},
27    {".pyo", IS_BYTECODE},
28    {".py", IS_SOURCE},
29    {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37    PyObject_HEAD
38    PyObject *archive;  /* pathname of the Zip archive */
39    PyObject *prefix;   /* file prefix: "a/sub/directory/" */
40    PyObject *files;    /* dict with file info {path: toc_entry} */
41};
42
43static PyObject *ZipImportError;
44static PyObject *zip_directory_cache = NULL;
45
46// GOOGLE(nanzhang): Changed two functions below to be visible to launcher so
47// that launcher can access the zip metadata section.
48/* forward decls */
49PyObject *read_directory(const char *archive);
50PyObject *get_data(const char *archive, PyObject *toc_entry);
51static PyObject *get_module_code(ZipImporter *self, char *fullname,
52                                 int *p_ispackage, char **p_modpath);
53
54
55#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56
57
58/* zipimporter.__init__
59   Split the "subdirectory" from the Zip archive path, lookup a matching
60   entry in sys.path_importer_cache, fetch the file directory from there
61   if found, or else read it from the archive. */
62static int
63zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
64{
65    char *path, *p, *prefix, buf[MAXPATHLEN+2];
66    size_t len;
67
68    if (!_PyArg_NoKeywords("zipimporter()", kwds))
69        return -1;
70
71    if (!PyArg_ParseTuple(args, "s:zipimporter",
72                          &path))
73        return -1;
74
75    len = strlen(path);
76    if (len == 0) {
77        PyErr_SetString(ZipImportError, "archive path is empty");
78        return -1;
79    }
80    if (len >= MAXPATHLEN) {
81        PyErr_SetString(ZipImportError,
82                        "archive path too long");
83        return -1;
84    }
85    strcpy(buf, path);
86
87#ifdef ALTSEP
88    for (p = buf; *p; p++) {
89        if (*p == ALTSEP)
90            *p = SEP;
91    }
92#endif
93
94    path = NULL;
95    prefix = NULL;
96    for (;;) {
97#ifndef RISCOS
98        struct stat statbuf;
99        int rv;
100
101        rv = stat(buf, &statbuf);
102        if (rv == 0) {
103            /* it exists */
104            if (S_ISREG(statbuf.st_mode))
105                /* it's a file */
106                path = buf;
107            break;
108        }
109#else
110        if (object_exists(buf)) {
111            /* it exists */
112            if (isfile(buf))
113                /* it's a file */
114                path = buf;
115            break;
116        }
117#endif
118        /* back up one path element */
119        p = strrchr(buf, SEP);
120        if (prefix != NULL)
121            *prefix = SEP;
122        if (p == NULL)
123            break;
124        *p = '\0';
125        prefix = p;
126    }
127    if (path != NULL) {
128        PyObject *files;
129        files = PyDict_GetItemString(zip_directory_cache, path);
130        if (files == NULL) {
131            files = read_directory(buf);
132            if (files == NULL)
133                return -1;
134            if (PyDict_SetItemString(zip_directory_cache, path,
135                                     files) != 0)
136                return -1;
137        }
138        else
139            Py_INCREF(files);
140        self->files = files;
141    }
142    else {
143        PyErr_SetString(ZipImportError, "not a Zip file");
144        return -1;
145    }
146
147    if (prefix == NULL)
148        prefix = "";
149    else {
150        prefix++;
151        len = strlen(prefix);
152        if (prefix[len-1] != SEP) {
153            /* add trailing SEP */
154            prefix[len] = SEP;
155            prefix[len + 1] = '\0';
156        }
157    }
158
159    self->archive = PyString_FromString(buf);
160    if (self->archive == NULL)
161        return -1;
162
163    self->prefix = PyString_FromString(prefix);
164    if (self->prefix == NULL)
165        return -1;
166
167    return 0;
168}
169
170/* GC support. */
171static int
172zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
173{
174    ZipImporter *self = (ZipImporter *)obj;
175    Py_VISIT(self->files);
176    return 0;
177}
178
179static void
180zipimporter_dealloc(ZipImporter *self)
181{
182    PyObject_GC_UnTrack(self);
183    Py_XDECREF(self->archive);
184    Py_XDECREF(self->prefix);
185    Py_XDECREF(self->files);
186    Py_TYPE(self)->tp_free((PyObject *)self);
187}
188
189static PyObject *
190zipimporter_repr(ZipImporter *self)
191{
192    char buf[500];
193    char *archive = "???";
194    char *prefix = "";
195
196    if (self->archive != NULL && PyString_Check(self->archive))
197        archive = PyString_AsString(self->archive);
198    if (self->prefix != NULL && PyString_Check(self->prefix))
199        prefix = PyString_AsString(self->prefix);
200    if (prefix != NULL && *prefix)
201        PyOS_snprintf(buf, sizeof(buf),
202                      "<zipimporter object \"%.300s%c%.150s\">",
203                      archive, SEP, prefix);
204    else
205        PyOS_snprintf(buf, sizeof(buf),
206                      "<zipimporter object \"%.300s\">",
207                      archive);
208    return PyString_FromString(buf);
209}
210
211/* return fullname.split(".")[-1] */
212static char *
213get_subname(char *fullname)
214{
215    char *subname = strrchr(fullname, '.');
216    if (subname == NULL)
217        subname = fullname;
218    else
219        subname++;
220    return subname;
221}
222
223/* Given a (sub)modulename, write the potential file path in the
224   archive (without extension) to the path buffer. Return the
225   length of the resulting string. */
226static int
227make_filename(char *prefix, char *name, char *path)
228{
229    size_t len;
230    char *p;
231
232    len = strlen(prefix);
233
234    /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
235    if (len + strlen(name) + 13 >= MAXPATHLEN) {
236        PyErr_SetString(ZipImportError, "path too long");
237        return -1;
238    }
239
240    strcpy(path, prefix);
241    strcpy(path + len, name);
242    for (p = path + len; *p; p++) {
243        if (*p == '.')
244            *p = SEP;
245    }
246    len += strlen(name);
247    assert(len < INT_MAX);
248    return (int)len;
249}
250
251enum zi_module_info {
252    MI_ERROR,
253    MI_NOT_FOUND,
254    MI_MODULE,
255    MI_PACKAGE
256};
257
258/* Return some information about a module. */
259static enum zi_module_info
260get_module_info(ZipImporter *self, char *fullname)
261{
262    char *subname, path[MAXPATHLEN + 1];
263    int len;
264    struct st_zip_searchorder *zso;
265
266    subname = get_subname(fullname);
267
268    len = make_filename(PyString_AsString(self->prefix), subname, path);
269    if (len < 0)
270        return MI_ERROR;
271
272    for (zso = zip_searchorder; *zso->suffix; zso++) {
273        strcpy(path + len, zso->suffix);
274        if (PyDict_GetItemString(self->files, path) != NULL) {
275            if (zso->type & IS_PACKAGE)
276                return MI_PACKAGE;
277            else
278                return MI_MODULE;
279        }
280    }
281    return MI_NOT_FOUND;
282}
283
284/* Check whether we can satisfy the import of the module named by
285   'fullname'. Return self if we can, None if we can't. */
286static PyObject *
287zipimporter_find_module(PyObject *obj, PyObject *args)
288{
289    ZipImporter *self = (ZipImporter *)obj;
290    PyObject *path = NULL;
291    char *fullname;
292    enum zi_module_info mi;
293
294    if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
295                          &fullname, &path))
296        return NULL;
297
298    mi = get_module_info(self, fullname);
299    if (mi == MI_ERROR)
300        return NULL;
301    if (mi == MI_NOT_FOUND) {
302        Py_INCREF(Py_None);
303        return Py_None;
304    }
305    Py_INCREF(self);
306    return (PyObject *)self;
307}
308
309/* Load and return the module named by 'fullname'. */
310static PyObject *
311zipimporter_load_module(PyObject *obj, PyObject *args)
312{
313    ZipImporter *self = (ZipImporter *)obj;
314    PyObject *code, *mod, *dict;
315    char *fullname, *modpath;
316    int ispackage;
317
318    if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
319                          &fullname))
320        return NULL;
321
322    code = get_module_code(self, fullname, &ispackage, &modpath);
323    if (code == NULL)
324        return NULL;
325
326    mod = PyImport_AddModule(fullname);
327    if (mod == NULL) {
328        Py_DECREF(code);
329        return NULL;
330    }
331    dict = PyModule_GetDict(mod);
332
333    /* mod.__loader__ = self */
334    if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
335        goto error;
336
337    if (ispackage) {
338        /* add __path__ to the module *before* the code gets
339           executed */
340        PyObject *pkgpath, *fullpath;
341        char *prefix = PyString_AsString(self->prefix);
342        char *subname = get_subname(fullname);
343        int err;
344
345        fullpath = PyString_FromFormat("%s%c%s%s",
346                                PyString_AsString(self->archive),
347                                SEP,
348                                *prefix ? prefix : "",
349                                subname);
350        if (fullpath == NULL)
351            goto error;
352
353        pkgpath = Py_BuildValue("[O]", fullpath);
354        Py_DECREF(fullpath);
355        if (pkgpath == NULL)
356            goto error;
357        err = PyDict_SetItemString(dict, "__path__", pkgpath);
358        Py_DECREF(pkgpath);
359        if (err != 0)
360            goto error;
361    }
362    mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
363    Py_DECREF(code);
364    if (Py_VerboseFlag)
365        PySys_WriteStderr("import %s # loaded from Zip %s\n",
366                          fullname, modpath);
367    return mod;
368error:
369    Py_DECREF(code);
370    Py_DECREF(mod);
371    return NULL;
372}
373
374/* Return a string matching __file__ for the named module */
375static PyObject *
376zipimporter_get_filename(PyObject *obj, PyObject *args)
377{
378    ZipImporter *self = (ZipImporter *)obj;
379    PyObject *code;
380    char *fullname, *modpath;
381    int ispackage;
382
383    if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
384                         &fullname))
385        return NULL;
386
387    /* Deciding the filename requires working out where the code
388       would come from if the module was actually loaded */
389    code = get_module_code(self, fullname, &ispackage, &modpath);
390    if (code == NULL)
391        return NULL;
392    Py_DECREF(code); /* Only need the path info */
393
394    return PyString_FromString(modpath);
395}
396
397/* Return a bool signifying whether the module is a package or not. */
398static PyObject *
399zipimporter_is_package(PyObject *obj, PyObject *args)
400{
401    ZipImporter *self = (ZipImporter *)obj;
402    char *fullname;
403    enum zi_module_info mi;
404
405    if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
406                          &fullname))
407        return NULL;
408
409    mi = get_module_info(self, fullname);
410    if (mi == MI_ERROR)
411        return NULL;
412    if (mi == MI_NOT_FOUND) {
413        PyErr_Format(ZipImportError, "can't find module '%.200s'",
414                     fullname);
415        return NULL;
416    }
417    return PyBool_FromLong(mi == MI_PACKAGE);
418}
419
420static PyObject *
421zipimporter_get_data(PyObject *obj, PyObject *args)
422{
423    ZipImporter *self = (ZipImporter *)obj;
424    char *path;
425#ifdef ALTSEP
426    char *p, buf[MAXPATHLEN + 1];
427#endif
428    PyObject *toc_entry;
429    Py_ssize_t len;
430
431    if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
432        return NULL;
433
434#ifdef ALTSEP
435    if (strlen(path) >= MAXPATHLEN) {
436        PyErr_SetString(ZipImportError, "path too long");
437        return NULL;
438    }
439    strcpy(buf, path);
440    for (p = buf; *p; p++) {
441        if (*p == ALTSEP)
442            *p = SEP;
443    }
444    path = buf;
445#endif
446    len = PyString_Size(self->archive);
447    if ((size_t)len < strlen(path) &&
448        strncmp(path, PyString_AsString(self->archive), len) == 0 &&
449        path[len] == SEP) {
450        path = path + len + 1;
451    }
452
453    toc_entry = PyDict_GetItemString(self->files, path);
454    if (toc_entry == NULL) {
455        PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
456        return NULL;
457    }
458    return get_data(PyString_AsString(self->archive), toc_entry);
459}
460
461static PyObject *
462zipimporter_get_code(PyObject *obj, PyObject *args)
463{
464    ZipImporter *self = (ZipImporter *)obj;
465    char *fullname;
466
467    if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
468        return NULL;
469
470    return get_module_code(self, fullname, NULL, NULL);
471}
472
473static PyObject *
474zipimporter_get_source(PyObject *obj, PyObject *args)
475{
476    ZipImporter *self = (ZipImporter *)obj;
477    PyObject *toc_entry;
478    char *fullname, *subname, path[MAXPATHLEN+1];
479    int len;
480    enum zi_module_info mi;
481
482    if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
483        return NULL;
484
485    mi = get_module_info(self, fullname);
486    if (mi == MI_ERROR)
487        return NULL;
488    if (mi == MI_NOT_FOUND) {
489        PyErr_Format(ZipImportError, "can't find module '%.200s'",
490                     fullname);
491        return NULL;
492    }
493    subname = get_subname(fullname);
494
495    len = make_filename(PyString_AsString(self->prefix), subname, path);
496    if (len < 0)
497        return NULL;
498
499    if (mi == MI_PACKAGE) {
500        path[len] = SEP;
501        strcpy(path + len + 1, "__init__.py");
502    }
503    else
504        strcpy(path + len, ".py");
505
506    toc_entry = PyDict_GetItemString(self->files, path);
507    if (toc_entry != NULL)
508        return get_data(PyString_AsString(self->archive), toc_entry);
509
510    /* we have the module, but no source */
511    Py_INCREF(Py_None);
512    return Py_None;
513}
514
515PyDoc_STRVAR(doc_find_module,
516"find_module(fullname, path=None) -> self or None.\n\
517\n\
518Search for a module specified by 'fullname'. 'fullname' must be the\n\
519fully qualified (dotted) module name. It returns the zipimporter\n\
520instance itself if the module was found, or None if it wasn't.\n\
521The optional 'path' argument is ignored -- it's there for compatibility\n\
522with the importer protocol.");
523
524PyDoc_STRVAR(doc_load_module,
525"load_module(fullname) -> module.\n\
526\n\
527Load the module specified by 'fullname'. 'fullname' must be the\n\
528fully qualified (dotted) module name. It returns the imported\n\
529module, or raises ZipImportError if it wasn't found.");
530
531PyDoc_STRVAR(doc_get_data,
532"get_data(pathname) -> string with file data.\n\
533\n\
534Return the data associated with 'pathname'. Raise IOError if\n\
535the file wasn't found.");
536
537PyDoc_STRVAR(doc_is_package,
538"is_package(fullname) -> bool.\n\
539\n\
540Return True if the module specified by fullname is a package.\n\
541Raise ZipImportError if the module couldn't be found.");
542
543PyDoc_STRVAR(doc_get_code,
544"get_code(fullname) -> code object.\n\
545\n\
546Return the code object for the specified module. Raise ZipImportError\n\
547if the module couldn't be found.");
548
549PyDoc_STRVAR(doc_get_source,
550"get_source(fullname) -> source string.\n\
551\n\
552Return the source code for the specified module. Raise ZipImportError\n\
553if the module couldn't be found, return None if the archive does\n\
554contain the module, but has no source for it.");
555
556
557PyDoc_STRVAR(doc_get_filename,
558"get_filename(fullname) -> filename string.\n\
559\n\
560Return the filename for the specified module.");
561
562static PyMethodDef zipimporter_methods[] = {
563    {"find_module", zipimporter_find_module, METH_VARARGS,
564     doc_find_module},
565    {"load_module", zipimporter_load_module, METH_VARARGS,
566     doc_load_module},
567    {"get_data", zipimporter_get_data, METH_VARARGS,
568     doc_get_data},
569    {"get_code", zipimporter_get_code, METH_VARARGS,
570     doc_get_code},
571    {"get_source", zipimporter_get_source, METH_VARARGS,
572     doc_get_source},
573    {"get_filename", zipimporter_get_filename, METH_VARARGS,
574     doc_get_filename},
575    {"is_package", zipimporter_is_package, METH_VARARGS,
576     doc_is_package},
577    {NULL,              NULL}   /* sentinel */
578};
579
580static PyMemberDef zipimporter_members[] = {
581    {"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
582    {"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
583    {"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
584    {NULL}
585};
586
587PyDoc_STRVAR(zipimporter_doc,
588"zipimporter(archivepath) -> zipimporter object\n\
589\n\
590Create a new zipimporter instance. 'archivepath' must be a path to\n\
591a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
592'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
593valid directory inside the archive.\n\
594\n\
595'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
596archive.\n\
597\n\
598The 'archive' attribute of zipimporter objects contains the name of the\n\
599zipfile targeted.");
600
601#define DEFERRED_ADDRESS(ADDR) 0
602
603static PyTypeObject ZipImporter_Type = {
604    PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
605    "zipimport.zipimporter",
606    sizeof(ZipImporter),
607    0,                                          /* tp_itemsize */
608    (destructor)zipimporter_dealloc,            /* tp_dealloc */
609    0,                                          /* tp_print */
610    0,                                          /* tp_getattr */
611    0,                                          /* tp_setattr */
612    0,                                          /* tp_compare */
613    (reprfunc)zipimporter_repr,                 /* tp_repr */
614    0,                                          /* tp_as_number */
615    0,                                          /* tp_as_sequence */
616    0,                                          /* tp_as_mapping */
617    0,                                          /* tp_hash */
618    0,                                          /* tp_call */
619    0,                                          /* tp_str */
620    PyObject_GenericGetAttr,                    /* tp_getattro */
621    0,                                          /* tp_setattro */
622    0,                                          /* tp_as_buffer */
623    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
624        Py_TPFLAGS_HAVE_GC,                     /* tp_flags */
625    zipimporter_doc,                            /* tp_doc */
626    zipimporter_traverse,                       /* tp_traverse */
627    0,                                          /* tp_clear */
628    0,                                          /* tp_richcompare */
629    0,                                          /* tp_weaklistoffset */
630    0,                                          /* tp_iter */
631    0,                                          /* tp_iternext */
632    zipimporter_methods,                        /* tp_methods */
633    zipimporter_members,                        /* tp_members */
634    0,                                          /* tp_getset */
635    0,                                          /* tp_base */
636    0,                                          /* tp_dict */
637    0,                                          /* tp_descr_get */
638    0,                                          /* tp_descr_set */
639    0,                                          /* tp_dictoffset */
640    (initproc)zipimporter_init,                 /* tp_init */
641    PyType_GenericAlloc,                        /* tp_alloc */
642    PyType_GenericNew,                          /* tp_new */
643    PyObject_GC_Del,                            /* tp_free */
644};
645
646
647/* implementation */
648
649/* Given a buffer, return the unsigned int that is represented by the first
650   4 bytes, encoded as little endian. This partially reimplements
651   marshal.c:r_long() */
652static unsigned int
653get_uint32(const unsigned char *buf)
654{
655    unsigned int x;
656    x =  buf[0];
657    x |= (unsigned int)buf[1] <<  8;
658    x |= (unsigned int)buf[2] << 16;
659    x |= (unsigned int)buf[3] << 24;
660    return x;
661}
662
663/* Given a buffer, return the unsigned int that is represented by the first
664   2 bytes, encoded as little endian. This partially reimplements
665   marshal.c:r_short() */
666static unsigned short
667get_uint16(const unsigned char *buf)
668{
669    unsigned short x;
670    x =  buf[0];
671    x |= (unsigned short)buf[1] <<  8;
672    return x;
673}
674
675static void
676set_file_error(const char *archive, int eof)
677{
678    if (eof) {
679        PyErr_SetString(PyExc_EOFError, "EOF read where not expected");
680    }
681    else {
682        PyErr_SetFromErrnoWithFilename(PyExc_IOError, archive);
683    }
684}
685
686/*
687   read_directory(archive) -> files dict (new reference)
688
689   Given a path to a Zip archive, build a dict, mapping file names
690   (local to the archive, using SEP as a separator) to toc entries.
691
692   A toc_entry is a tuple:
693
694   (__file__,      # value to use for __file__, available for all files
695    compress,      # compression kind; 0 for uncompressed
696    data_size,     # size of compressed data on disk
697    file_size,     # size of decompressed data
698    file_offset,   # offset of file header from start of archive
699    time,          # mod time of file (in dos format)
700    date,          # mod data of file (in dos format)
701    crc,           # crc checksum of the data
702   )
703
704   Directories can be recognized by the trailing SEP in the name,
705   data_size and file_offset are 0.
706*/
707PyObject *
708read_directory(const char *archive)
709{
710    PyObject *files = NULL;
711    FILE *fp;
712    unsigned short compress, time, date, name_size;
713    unsigned int crc, data_size, file_size, header_size, header_offset;
714    unsigned long file_offset, header_position;
715    unsigned long arc_offset;  /* Absolute offset to start of the zip-archive. */
716    unsigned int count, i;
717    unsigned char buffer[46];
718    size_t length;
719    char path[MAXPATHLEN + 5];
720    char name[MAXPATHLEN + 5];
721    const char *errmsg = NULL;
722
723    if (strlen(archive) > MAXPATHLEN) {
724        PyErr_SetString(PyExc_OverflowError,
725                        "Zip path name is too long");
726        return NULL;
727    }
728    strcpy(path, archive);
729
730    fp = fopen(archive, "rb");
731    if (fp == NULL) {
732        PyErr_Format(ZipImportError, "can't open Zip file: "
733                     "'%.200s'", archive);
734        return NULL;
735    }
736
737    if (fseek(fp, -22, SEEK_END) == -1) {
738        goto file_error;
739    }
740    header_position = (unsigned long)ftell(fp);
741    if (header_position == (unsigned long)-1) {
742        goto file_error;
743    }
744    assert(header_position <= (unsigned long)LONG_MAX);
745    if (fread(buffer, 1, 22, fp) != 22) {
746        goto file_error;
747    }
748    if (get_uint32(buffer) != 0x06054B50u) {
749        /* Bad: End of Central Dir signature */
750        errmsg = "not a Zip file";
751        goto invalid_header;
752    }
753
754    header_size = get_uint32(buffer + 12);
755    header_offset = get_uint32(buffer + 16);
756    if (header_position < header_size) {
757        errmsg = "bad central directory size";
758        goto invalid_header;
759    }
760    if (header_position < header_offset) {
761        errmsg = "bad central directory offset";
762        goto invalid_header;
763    }
764    if (header_position - header_size < header_offset) {
765        errmsg = "bad central directory size or offset";
766        goto invalid_header;
767    }
768    header_position -= header_size;
769    arc_offset = header_position - header_offset;
770
771    files = PyDict_New();
772    if (files == NULL) {
773        goto error;
774    }
775
776    length = (long)strlen(path);
777    path[length] = SEP;
778
779    /* Start of Central Directory */
780    count = 0;
781    if (fseek(fp, (long)header_position, 0) == -1) {
782        goto file_error;
783    }
784    for (;;) {
785        PyObject *t;
786        size_t n;
787        int err;
788
789        n = fread(buffer, 1, 46, fp);
790        if (n < 4) {
791            goto eof_error;
792        }
793        /* Start of file header */
794        if (get_uint32(buffer) != 0x02014B50u) {
795            break;              /* Bad: Central Dir File Header */
796        }
797        if (n != 46) {
798            goto eof_error;
799        }
800        compress = get_uint16(buffer + 10);
801        time = get_uint16(buffer + 12);
802        date = get_uint16(buffer + 14);
803        crc = get_uint32(buffer + 16);
804        data_size = get_uint32(buffer + 20);
805        file_size = get_uint32(buffer + 24);
806        name_size = get_uint16(buffer + 28);
807        header_size = (unsigned int)name_size +
808           get_uint16(buffer + 30) /* extra field */ +
809           get_uint16(buffer + 32) /* comment */;
810
811        file_offset = get_uint32(buffer + 42);
812        if (file_offset > header_offset) {
813            errmsg = "bad local header offset";
814            goto invalid_header;
815        }
816        file_offset += arc_offset;
817
818        if (name_size > MAXPATHLEN) {
819            name_size = MAXPATHLEN;
820        }
821        if (fread(name, 1, name_size, fp) != name_size) {
822            goto file_error;
823        }
824        name[name_size] = '\0';  /* Add terminating null byte */
825        if (SEP != '/') {
826            for (i = 0; i < name_size; i++) {
827                if (name[i] == '/') {
828                    name[i] = SEP;
829                }
830            }
831        }
832        /* Skip the rest of the header.
833         * On Windows, calling fseek to skip over the fields we don't use is
834         * slower than reading the data because fseek flushes stdio's
835         * internal buffers.  See issue #8745. */
836        assert(header_size <= 3*0xFFFFu);
837        for (i = name_size; i < header_size; i++) {
838            if (getc(fp) == EOF) {
839                goto file_error;
840            }
841        }
842
843        strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
844
845        t = Py_BuildValue("sHIIkHHI", path, compress, data_size,
846                          file_size, file_offset, time, date, crc);
847        if (t == NULL) {
848            goto error;
849        }
850        err = PyDict_SetItemString(files, name, t);
851        Py_DECREF(t);
852        if (err != 0) {
853            goto error;
854        }
855        count++;
856    }
857    fclose(fp);
858    if (Py_VerboseFlag) {
859        PySys_WriteStderr("# zipimport: found %u names in %.200s\n",
860                           count, archive);
861    }
862    return files;
863
864eof_error:
865    set_file_error(archive, !ferror(fp));
866    goto error;
867
868file_error:
869    PyErr_Format(ZipImportError, "can't read Zip file: %.200s", archive);
870    goto error;
871
872invalid_header:
873    assert(errmsg != NULL);
874    PyErr_Format(ZipImportError, "%s: %.200s", errmsg, archive);
875    goto error;
876
877error:
878    fclose(fp);
879    Py_XDECREF(files);
880    return NULL;
881}
882
883/* Return the zlib.decompress function object, or NULL if zlib couldn't
884   be imported. The function is cached when found, so subsequent calls
885   don't import zlib again. */
886static PyObject *
887get_decompress_func(void)
888{
889    static int importing_zlib = 0;
890    PyObject *zlib;
891    PyObject *decompress;
892
893    if (importing_zlib != 0)
894        /* Someone has a zlib.py[co] in their Zip file;
895           let's avoid a stack overflow. */
896        return NULL;
897    importing_zlib = 1;
898    zlib = PyImport_ImportModuleNoBlock("zlib");
899    importing_zlib = 0;
900    if (zlib != NULL) {
901        decompress = PyObject_GetAttrString(zlib,
902                                            "decompress");
903        Py_DECREF(zlib);
904    }
905    else {
906        PyErr_Clear();
907        decompress = NULL;
908    }
909    if (Py_VerboseFlag)
910        PySys_WriteStderr("# zipimport: zlib %s\n",
911            zlib != NULL ? "available": "UNAVAILABLE");
912    return decompress;
913}
914
915/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
916   data as a new reference. */
917PyObject *
918get_data(const char *archive, PyObject *toc_entry)
919{
920    PyObject *raw_data = NULL, *data, *decompress;
921    char *buf;
922    FILE *fp;
923    const char *datapath;
924    unsigned short compress, time, date;
925    unsigned int crc;
926    Py_ssize_t data_size, file_size;
927    long file_offset, header_size;
928    unsigned char buffer[30];
929    const char *errmsg = NULL;
930
931    if (!PyArg_ParseTuple(toc_entry, "sHnnlHHI", &datapath, &compress,
932                          &data_size, &file_size, &file_offset, &time,
933                          &date, &crc)) {
934        return NULL;
935    }
936    if (data_size < 0) {
937        PyErr_Format(ZipImportError, "negative data size");
938        return NULL;
939    }
940
941    fp = fopen(archive, "rb");
942    if (!fp) {
943        PyErr_Format(PyExc_IOError,
944           "zipimport: can not open file %s", archive);
945        return NULL;
946    }
947
948    /* Check to make sure the local file header is correct */
949    if (fseek(fp, file_offset, 0) == -1) {
950        goto file_error;
951    }
952    if (fread(buffer, 1, 30, fp) != 30) {
953        goto eof_error;
954    }
955    if (get_uint32(buffer) != 0x04034B50u) {
956        /* Bad: Local File Header */
957        errmsg = "bad local file header";
958        goto invalid_header;
959    }
960
961    header_size = (unsigned int)30 +
962        get_uint16(buffer + 26) /* file name */ +
963        get_uint16(buffer + 28) /* extra field */;
964    if (file_offset > LONG_MAX - header_size) {
965        errmsg = "bad local file header size";
966        goto invalid_header;
967    }
968    file_offset += header_size;  /* Start of file data */
969
970    if (data_size > LONG_MAX - 1) {
971        fclose(fp);
972        PyErr_NoMemory();
973        return NULL;
974    }
975    raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
976                                          data_size : data_size + 1);
977
978    if (raw_data == NULL) {
979        goto error;
980    }
981    buf = PyString_AsString(raw_data);
982
983    if (fseek(fp, file_offset, 0) == -1) {
984        goto file_error;
985    }
986    if (fread(buf, 1, data_size, fp) != (size_t)data_size) {
987        PyErr_SetString(PyExc_IOError,
988                        "zipimport: can't read data");
989        goto error;
990    }
991
992    fclose(fp);
993    fp = NULL;
994
995    if (compress != 0) {
996        buf[data_size] = 'Z';  /* saw this in zipfile.py */
997        data_size++;
998    }
999    buf[data_size] = '\0';
1000
1001    if (compress == 0)  /* data is not compressed */
1002        return raw_data;
1003
1004    /* Decompress with zlib */
1005    decompress = get_decompress_func();
1006    if (decompress == NULL) {
1007        PyErr_SetString(ZipImportError,
1008                        "can't decompress data; "
1009                        "zlib not available");
1010        goto error;
1011    }
1012    data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
1013    Py_DECREF(decompress);
1014    Py_DECREF(raw_data);
1015    return data;
1016
1017eof_error:
1018    set_file_error(archive, !ferror(fp));
1019    goto error;
1020
1021file_error:
1022    PyErr_Format(ZipImportError, "can't read Zip file: %.200s", archive);
1023    goto error;
1024
1025invalid_header:
1026    assert(errmsg != NULL);
1027    PyErr_Format(ZipImportError, "%s: %.200s", errmsg, archive);
1028    goto error;
1029
1030error:
1031    if (fp != NULL) {
1032        fclose(fp);
1033    }
1034    Py_XDECREF(raw_data);
1035    return NULL;
1036}
1037
1038/* Lenient date/time comparison function. The precision of the mtime
1039   in the archive is lower than the mtime stored in a .pyc: we
1040   must allow a difference of at most one second. */
1041static int
1042eq_mtime(time_t t1, time_t t2)
1043{
1044    time_t d = t1 - t2;
1045    if (d < 0)
1046        d = -d;
1047    /* dostime only stores even seconds, so be lenient */
1048    return d <= 1;
1049}
1050
1051/* Given the contents of a .py[co] file in a buffer, unmarshal the data
1052   and return the code object. Return None if it the magic word doesn't
1053   match (we do this instead of raising an exception as we fall back
1054   to .py if available and we don't want to mask other errors).
1055   Returns a new reference. */
1056static PyObject *
1057unmarshal_code(const char *pathname, PyObject *data, time_t mtime)
1058{
1059    PyObject *code;
1060    unsigned char *buf = (unsigned char *)PyString_AsString(data);
1061    Py_ssize_t size = PyString_Size(data);
1062
1063    if (size < 8) {
1064        PyErr_SetString(ZipImportError,
1065                        "bad pyc data");
1066        return NULL;
1067    }
1068
1069    if (get_uint32(buf) != (unsigned int)PyImport_GetMagicNumber()) {
1070        if (Py_VerboseFlag) {
1071            PySys_WriteStderr("# %s has bad magic\n",
1072                              pathname);
1073        }
1074        Py_INCREF(Py_None);
1075        return Py_None;  /* signal caller to try alternative */
1076    }
1077
1078    if (mtime != 0 && !eq_mtime(get_uint32(buf + 4), mtime)) {
1079        if (Py_VerboseFlag) {
1080            PySys_WriteStderr("# %s has bad mtime\n",
1081                              pathname);
1082        }
1083        Py_INCREF(Py_None);
1084        return Py_None;  /* signal caller to try alternative */
1085    }
1086
1087    code = PyMarshal_ReadObjectFromString((char *)buf + 8, size - 8);
1088    if (code == NULL) {
1089        return NULL;
1090    }
1091    if (!PyCode_Check(code)) {
1092        Py_DECREF(code);
1093        PyErr_Format(PyExc_TypeError,
1094             "compiled module %.200s is not a code object",
1095             pathname);
1096        return NULL;
1097    }
1098    return code;
1099}
1100
1101/* Replace any occurrences of "\r\n?" in the input string with "\n".
1102   This converts DOS and Mac line endings to Unix line endings.
1103   Also append a trailing "\n" to be compatible with
1104   PyParser_SimpleParseFile(). Returns a new reference. */
1105static PyObject *
1106normalize_line_endings(PyObject *source)
1107{
1108    char *buf, *q, *p = PyString_AsString(source);
1109    PyObject *fixed_source;
1110
1111    if (!p)
1112        return NULL;
1113
1114    /* one char extra for trailing \n and one for terminating \0 */
1115    buf = (char *)PyMem_Malloc(PyString_Size(source) + 2);
1116    if (buf == NULL) {
1117        PyErr_SetString(PyExc_MemoryError,
1118                        "zipimport: no memory to allocate "
1119                        "source buffer");
1120        return NULL;
1121    }
1122    /* replace "\r\n?" by "\n" */
1123    for (q = buf; *p != '\0'; p++) {
1124        if (*p == '\r') {
1125            *q++ = '\n';
1126            if (*(p + 1) == '\n')
1127                p++;
1128        }
1129        else
1130            *q++ = *p;
1131    }
1132    *q++ = '\n';  /* add trailing \n */
1133    *q = '\0';
1134    fixed_source = PyString_FromString(buf);
1135    PyMem_Free(buf);
1136    return fixed_source;
1137}
1138
1139/* Given a string buffer containing Python source code, compile it
1140   return and return a code object as a new reference. */
1141static PyObject *
1142compile_source(char *pathname, PyObject *source)
1143{
1144    PyObject *code, *fixed_source;
1145
1146    fixed_source = normalize_line_endings(source);
1147    if (fixed_source == NULL)
1148        return NULL;
1149
1150    code = Py_CompileString(PyString_AsString(fixed_source), pathname,
1151                            Py_file_input);
1152    Py_DECREF(fixed_source);
1153    return code;
1154}
1155
1156/* Convert the date/time values found in the Zip archive to a value
1157   that's compatible with the time stamp stored in .pyc files. */
1158static time_t
1159parse_dostime(int dostime, int dosdate)
1160{
1161    struct tm stm;
1162
1163    memset((void *) &stm, '\0', sizeof(stm));
1164
1165    stm.tm_sec   =  (dostime        & 0x1f) * 2;
1166    stm.tm_min   =  (dostime >> 5)  & 0x3f;
1167    stm.tm_hour  =  (dostime >> 11) & 0x1f;
1168    stm.tm_mday  =   dosdate        & 0x1f;
1169    stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1170    stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1171    stm.tm_isdst =   -1; /* wday/yday is ignored */
1172
1173    return mktime(&stm);
1174}
1175
1176/* Given a path to a .pyc or .pyo file in the archive, return the
1177   modification time of the matching .py file, or 0 if no source
1178   is available. */
1179static time_t
1180get_mtime_of_source(ZipImporter *self, char *path)
1181{
1182    PyObject *toc_entry;
1183    time_t mtime = 0;
1184    Py_ssize_t lastchar = strlen(path) - 1;
1185    char savechar = path[lastchar];
1186    path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
1187    toc_entry = PyDict_GetItemString(self->files, path);
1188    if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1189        PyTuple_Size(toc_entry) == 8) {
1190        /* fetch the time stamp of the .py file for comparison
1191           with an embedded pyc time stamp */
1192        int time, date;
1193        time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1194        date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1195        mtime = parse_dostime(time, date);
1196    }
1197    path[lastchar] = savechar;
1198    return mtime;
1199}
1200
1201/* Return the code object for the module named by 'fullname' from the
1202   Zip archive as a new reference. */
1203static PyObject *
1204get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1205                   time_t mtime, PyObject *toc_entry)
1206{
1207    PyObject *data, *code;
1208    char *modpath;
1209    char *archive = PyString_AsString(self->archive);
1210
1211    if (archive == NULL)
1212        return NULL;
1213
1214    data = get_data(archive, toc_entry);
1215    if (data == NULL)
1216        return NULL;
1217
1218    modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1219
1220    if (isbytecode) {
1221        code = unmarshal_code(modpath, data, mtime);
1222    }
1223    else {
1224        code = compile_source(modpath, data);
1225    }
1226    Py_DECREF(data);
1227    return code;
1228}
1229
1230/* Get the code object associated with the module specified by
1231   'fullname'. */
1232static PyObject *
1233get_module_code(ZipImporter *self, char *fullname,
1234                int *p_ispackage, char **p_modpath)
1235{
1236    PyObject *toc_entry;
1237    char *subname, path[MAXPATHLEN + 1];
1238    int len;
1239    struct st_zip_searchorder *zso;
1240
1241    subname = get_subname(fullname);
1242
1243    len = make_filename(PyString_AsString(self->prefix), subname, path);
1244    if (len < 0)
1245        return NULL;
1246
1247    for (zso = zip_searchorder; *zso->suffix; zso++) {
1248        PyObject *code = NULL;
1249
1250        strcpy(path + len, zso->suffix);
1251        if (Py_VerboseFlag > 1)
1252            PySys_WriteStderr("# trying %s%c%s\n",
1253                              PyString_AsString(self->archive),
1254                              SEP, path);
1255        toc_entry = PyDict_GetItemString(self->files, path);
1256        if (toc_entry != NULL) {
1257            time_t mtime = 0;
1258            int ispackage = zso->type & IS_PACKAGE;
1259            int isbytecode = zso->type & IS_BYTECODE;
1260
1261            if (isbytecode)
1262                mtime = get_mtime_of_source(self, path);
1263            if (p_ispackage != NULL)
1264                *p_ispackage = ispackage;
1265            code = get_code_from_data(self, ispackage,
1266                                      isbytecode, mtime,
1267                                      toc_entry);
1268            if (code == Py_None) {
1269                /* bad magic number or non-matching mtime
1270                   in byte code, try next */
1271                Py_DECREF(code);
1272                continue;
1273            }
1274            if (code != NULL && p_modpath != NULL)
1275                *p_modpath = PyString_AsString(
1276                    PyTuple_GetItem(toc_entry, 0));
1277            return code;
1278        }
1279    }
1280    PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1281    return NULL;
1282}
1283
1284
1285/* Module init */
1286
1287PyDoc_STRVAR(zipimport_doc,
1288"zipimport provides support for importing Python modules from Zip archives.\n\
1289\n\
1290This module exports three objects:\n\
1291- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1292- ZipImportError: exception raised by zipimporter objects. It's a\n\
1293  subclass of ImportError, so it can be caught as ImportError, too.\n\
1294- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1295  info dicts, as used in zipimporter._files.\n\
1296\n\
1297It is usually not needed to use the zipimport module explicitly; it is\n\
1298used by the builtin import mechanism for sys.path items that are paths\n\
1299to Zip archives.");
1300
1301PyMODINIT_FUNC
1302initzipimport(void)
1303{
1304    PyObject *mod;
1305
1306    if (PyType_Ready(&ZipImporter_Type) < 0)
1307        return;
1308
1309    /* Correct directory separator */
1310    zip_searchorder[0].suffix[0] = SEP;
1311    zip_searchorder[1].suffix[0] = SEP;
1312    zip_searchorder[2].suffix[0] = SEP;
1313    if (Py_OptimizeFlag) {
1314        /* Reverse *.pyc and *.pyo */
1315        struct st_zip_searchorder tmp;
1316        tmp = zip_searchorder[0];
1317        zip_searchorder[0] = zip_searchorder[1];
1318        zip_searchorder[1] = tmp;
1319        tmp = zip_searchorder[3];
1320        zip_searchorder[3] = zip_searchorder[4];
1321        zip_searchorder[4] = tmp;
1322    }
1323
1324    mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1325                         NULL, PYTHON_API_VERSION);
1326    if (mod == NULL)
1327        return;
1328
1329    ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1330                                        PyExc_ImportError, NULL);
1331    if (ZipImportError == NULL)
1332        return;
1333
1334    Py_INCREF(ZipImportError);
1335    if (PyModule_AddObject(mod, "ZipImportError",
1336                           ZipImportError) < 0)
1337        return;
1338
1339    Py_INCREF(&ZipImporter_Type);
1340    if (PyModule_AddObject(mod, "zipimporter",
1341                           (PyObject *)&ZipImporter_Type) < 0)
1342        return;
1343
1344    zip_directory_cache = PyDict_New();
1345    if (zip_directory_cache == NULL)
1346        return;
1347    Py_INCREF(zip_directory_cache);
1348    if (PyModule_AddObject(mod, "_zip_directory_cache",
1349                           zip_directory_cache) < 0)
1350        return;
1351}
1352