1#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include <time.h>
6
7
8#define IS_SOURCE   0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE  0x2
11
12struct st_zip_searchorder {
13    char suffix[14];
14    int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18   archive: we first search for a package __init__, then for
19   non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20   are swapped by initzipimport() if we run in optimized mode. Also,
21   '/' is replaced by SEP there. */
22static struct st_zip_searchorder zip_searchorder[] = {
23    {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24    {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25    {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26    {".pyc", IS_BYTECODE},
27    {".pyo", IS_BYTECODE},
28    {".py", IS_SOURCE},
29    {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37    PyObject_HEAD
38    PyObject *archive;  /* pathname of the Zip archive */
39    PyObject *prefix;   /* file prefix: "a/sub/directory/" */
40    PyObject *files;    /* dict with file info {path: toc_entry} */
41};
42
43static PyObject *ZipImportError;
44static PyObject *zip_directory_cache = NULL;
45
46/* forward decls */
47static PyObject *read_directory(char *archive);
48static PyObject *get_data(char *archive, PyObject *toc_entry);
49static PyObject *get_module_code(ZipImporter *self, char *fullname,
50                                 int *p_ispackage, char **p_modpath);
51
52
53#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54
55
56/* zipimporter.__init__
57   Split the "subdirectory" from the Zip archive path, lookup a matching
58   entry in sys.path_importer_cache, fetch the file directory from there
59   if found, or else read it from the archive. */
60static int
61zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62{
63    char *path, *p, *prefix, buf[MAXPATHLEN+2];
64    size_t len;
65
66    if (!_PyArg_NoKeywords("zipimporter()", kwds))
67        return -1;
68
69    if (!PyArg_ParseTuple(args, "s:zipimporter",
70                          &path))
71        return -1;
72
73    len = strlen(path);
74    if (len == 0) {
75        PyErr_SetString(ZipImportError, "archive path is empty");
76        return -1;
77    }
78    if (len >= MAXPATHLEN) {
79        PyErr_SetString(ZipImportError,
80                        "archive path too long");
81        return -1;
82    }
83    strcpy(buf, path);
84
85#ifdef ALTSEP
86    for (p = buf; *p; p++) {
87        if (*p == ALTSEP)
88            *p = SEP;
89    }
90#endif
91
92    path = NULL;
93    prefix = NULL;
94    for (;;) {
95#ifndef RISCOS
96        struct stat statbuf;
97        int rv;
98
99        rv = stat(buf, &statbuf);
100        if (rv == 0) {
101            /* it exists */
102            if (S_ISREG(statbuf.st_mode))
103                /* it's a file */
104                path = buf;
105            break;
106        }
107#else
108        if (object_exists(buf)) {
109            /* it exists */
110            if (isfile(buf))
111                /* it's a file */
112                path = buf;
113            break;
114        }
115#endif
116        /* back up one path element */
117        p = strrchr(buf, SEP);
118        if (prefix != NULL)
119            *prefix = SEP;
120        if (p == NULL)
121            break;
122        *p = '\0';
123        prefix = p;
124    }
125    if (path != NULL) {
126        PyObject *files;
127        files = PyDict_GetItemString(zip_directory_cache, path);
128        if (files == NULL) {
129            files = read_directory(buf);
130            if (files == NULL)
131                return -1;
132            if (PyDict_SetItemString(zip_directory_cache, path,
133                                     files) != 0)
134                return -1;
135        }
136        else
137            Py_INCREF(files);
138        self->files = files;
139    }
140    else {
141        PyErr_SetString(ZipImportError, "not a Zip file");
142        return -1;
143    }
144
145    if (prefix == NULL)
146        prefix = "";
147    else {
148        prefix++;
149        len = strlen(prefix);
150        if (prefix[len-1] != SEP) {
151            /* add trailing SEP */
152            prefix[len] = SEP;
153            prefix[len + 1] = '\0';
154        }
155    }
156
157    self->archive = PyString_FromString(buf);
158    if (self->archive == NULL)
159        return -1;
160
161    self->prefix = PyString_FromString(prefix);
162    if (self->prefix == NULL)
163        return -1;
164
165    return 0;
166}
167
168/* GC support. */
169static int
170zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
171{
172    ZipImporter *self = (ZipImporter *)obj;
173    Py_VISIT(self->files);
174    return 0;
175}
176
177static void
178zipimporter_dealloc(ZipImporter *self)
179{
180    PyObject_GC_UnTrack(self);
181    Py_XDECREF(self->archive);
182    Py_XDECREF(self->prefix);
183    Py_XDECREF(self->files);
184    Py_TYPE(self)->tp_free((PyObject *)self);
185}
186
187static PyObject *
188zipimporter_repr(ZipImporter *self)
189{
190    char buf[500];
191    char *archive = "???";
192    char *prefix = "";
193
194    if (self->archive != NULL && PyString_Check(self->archive))
195        archive = PyString_AsString(self->archive);
196    if (self->prefix != NULL && PyString_Check(self->prefix))
197        prefix = PyString_AsString(self->prefix);
198    if (prefix != NULL && *prefix)
199        PyOS_snprintf(buf, sizeof(buf),
200                      "<zipimporter object \"%.300s%c%.150s\">",
201                      archive, SEP, prefix);
202    else
203        PyOS_snprintf(buf, sizeof(buf),
204                      "<zipimporter object \"%.300s\">",
205                      archive);
206    return PyString_FromString(buf);
207}
208
209/* return fullname.split(".")[-1] */
210static char *
211get_subname(char *fullname)
212{
213    char *subname = strrchr(fullname, '.');
214    if (subname == NULL)
215        subname = fullname;
216    else
217        subname++;
218    return subname;
219}
220
221/* Given a (sub)modulename, write the potential file path in the
222   archive (without extension) to the path buffer. Return the
223   length of the resulting string. */
224static int
225make_filename(char *prefix, char *name, char *path)
226{
227    size_t len;
228    char *p;
229
230    len = strlen(prefix);
231
232    /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
233    if (len + strlen(name) + 13 >= MAXPATHLEN) {
234        PyErr_SetString(ZipImportError, "path too long");
235        return -1;
236    }
237
238    strcpy(path, prefix);
239    strcpy(path + len, name);
240    for (p = path + len; *p; p++) {
241        if (*p == '.')
242            *p = SEP;
243    }
244    len += strlen(name);
245    assert(len < INT_MAX);
246    return (int)len;
247}
248
249enum zi_module_info {
250    MI_ERROR,
251    MI_NOT_FOUND,
252    MI_MODULE,
253    MI_PACKAGE
254};
255
256/* Return some information about a module. */
257static enum zi_module_info
258get_module_info(ZipImporter *self, char *fullname)
259{
260    char *subname, path[MAXPATHLEN + 1];
261    int len;
262    struct st_zip_searchorder *zso;
263
264    subname = get_subname(fullname);
265
266    len = make_filename(PyString_AsString(self->prefix), subname, path);
267    if (len < 0)
268        return MI_ERROR;
269
270    for (zso = zip_searchorder; *zso->suffix; zso++) {
271        strcpy(path + len, zso->suffix);
272        if (PyDict_GetItemString(self->files, path) != NULL) {
273            if (zso->type & IS_PACKAGE)
274                return MI_PACKAGE;
275            else
276                return MI_MODULE;
277        }
278    }
279    return MI_NOT_FOUND;
280}
281
282/* Check whether we can satisfy the import of the module named by
283   'fullname'. Return self if we can, None if we can't. */
284static PyObject *
285zipimporter_find_module(PyObject *obj, PyObject *args)
286{
287    ZipImporter *self = (ZipImporter *)obj;
288    PyObject *path = NULL;
289    char *fullname;
290    enum zi_module_info mi;
291
292    if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
293                          &fullname, &path))
294        return NULL;
295
296    mi = get_module_info(self, fullname);
297    if (mi == MI_ERROR)
298        return NULL;
299    if (mi == MI_NOT_FOUND) {
300        Py_INCREF(Py_None);
301        return Py_None;
302    }
303    Py_INCREF(self);
304    return (PyObject *)self;
305}
306
307/* Load and return the module named by 'fullname'. */
308static PyObject *
309zipimporter_load_module(PyObject *obj, PyObject *args)
310{
311    ZipImporter *self = (ZipImporter *)obj;
312    PyObject *code, *mod, *dict;
313    char *fullname, *modpath;
314    int ispackage;
315
316    if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
317                          &fullname))
318        return NULL;
319
320    code = get_module_code(self, fullname, &ispackage, &modpath);
321    if (code == NULL)
322        return NULL;
323
324    mod = PyImport_AddModule(fullname);
325    if (mod == NULL) {
326        Py_DECREF(code);
327        return NULL;
328    }
329    dict = PyModule_GetDict(mod);
330
331    /* mod.__loader__ = self */
332    if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
333        goto error;
334
335    if (ispackage) {
336        /* add __path__ to the module *before* the code gets
337           executed */
338        PyObject *pkgpath, *fullpath;
339        char *prefix = PyString_AsString(self->prefix);
340        char *subname = get_subname(fullname);
341        int err;
342
343        fullpath = PyString_FromFormat("%s%c%s%s",
344                                PyString_AsString(self->archive),
345                                SEP,
346                                *prefix ? prefix : "",
347                                subname);
348        if (fullpath == NULL)
349            goto error;
350
351        pkgpath = Py_BuildValue("[O]", fullpath);
352        Py_DECREF(fullpath);
353        if (pkgpath == NULL)
354            goto error;
355        err = PyDict_SetItemString(dict, "__path__", pkgpath);
356        Py_DECREF(pkgpath);
357        if (err != 0)
358            goto error;
359    }
360    mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
361    Py_DECREF(code);
362    if (Py_VerboseFlag)
363        PySys_WriteStderr("import %s # loaded from Zip %s\n",
364                          fullname, modpath);
365    return mod;
366error:
367    Py_DECREF(code);
368    Py_DECREF(mod);
369    return NULL;
370}
371
372/* Return a string matching __file__ for the named module */
373static PyObject *
374zipimporter_get_filename(PyObject *obj, PyObject *args)
375{
376    ZipImporter *self = (ZipImporter *)obj;
377    PyObject *code;
378    char *fullname, *modpath;
379    int ispackage;
380
381    if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
382                         &fullname))
383    return NULL;
384
385    /* Deciding the filename requires working out where the code
386       would come from if the module was actually loaded */
387    code = get_module_code(self, fullname, &ispackage, &modpath);
388    if (code == NULL)
389    return NULL;
390    Py_DECREF(code); /* Only need the path info */
391
392    return PyString_FromString(modpath);
393}
394
395/* Return a bool signifying whether the module is a package or not. */
396static PyObject *
397zipimporter_is_package(PyObject *obj, PyObject *args)
398{
399    ZipImporter *self = (ZipImporter *)obj;
400    char *fullname;
401    enum zi_module_info mi;
402
403    if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
404                          &fullname))
405        return NULL;
406
407    mi = get_module_info(self, fullname);
408    if (mi == MI_ERROR)
409        return NULL;
410    if (mi == MI_NOT_FOUND) {
411        PyErr_Format(ZipImportError, "can't find module '%.200s'",
412                     fullname);
413        return NULL;
414    }
415    return PyBool_FromLong(mi == MI_PACKAGE);
416}
417
418static PyObject *
419zipimporter_get_data(PyObject *obj, PyObject *args)
420{
421    ZipImporter *self = (ZipImporter *)obj;
422    char *path;
423#ifdef ALTSEP
424    char *p, buf[MAXPATHLEN + 1];
425#endif
426    PyObject *toc_entry;
427    Py_ssize_t len;
428
429    if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
430        return NULL;
431
432#ifdef ALTSEP
433    if (strlen(path) >= MAXPATHLEN) {
434        PyErr_SetString(ZipImportError, "path too long");
435        return NULL;
436    }
437    strcpy(buf, path);
438    for (p = buf; *p; p++) {
439        if (*p == ALTSEP)
440            *p = SEP;
441    }
442    path = buf;
443#endif
444    len = PyString_Size(self->archive);
445    if ((size_t)len < strlen(path) &&
446        strncmp(path, PyString_AsString(self->archive), len) == 0 &&
447        path[len] == SEP) {
448        path = path + len + 1;
449    }
450
451    toc_entry = PyDict_GetItemString(self->files, path);
452    if (toc_entry == NULL) {
453        PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
454        return NULL;
455    }
456    return get_data(PyString_AsString(self->archive), toc_entry);
457}
458
459static PyObject *
460zipimporter_get_code(PyObject *obj, PyObject *args)
461{
462    ZipImporter *self = (ZipImporter *)obj;
463    char *fullname;
464
465    if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
466        return NULL;
467
468    return get_module_code(self, fullname, NULL, NULL);
469}
470
471static PyObject *
472zipimporter_get_source(PyObject *obj, PyObject *args)
473{
474    ZipImporter *self = (ZipImporter *)obj;
475    PyObject *toc_entry;
476    char *fullname, *subname, path[MAXPATHLEN+1];
477    int len;
478    enum zi_module_info mi;
479
480    if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
481        return NULL;
482
483    mi = get_module_info(self, fullname);
484    if (mi == MI_ERROR)
485        return NULL;
486    if (mi == MI_NOT_FOUND) {
487        PyErr_Format(ZipImportError, "can't find module '%.200s'",
488                     fullname);
489        return NULL;
490    }
491    subname = get_subname(fullname);
492
493    len = make_filename(PyString_AsString(self->prefix), subname, path);
494    if (len < 0)
495        return NULL;
496
497    if (mi == MI_PACKAGE) {
498        path[len] = SEP;
499        strcpy(path + len + 1, "__init__.py");
500    }
501    else
502        strcpy(path + len, ".py");
503
504    toc_entry = PyDict_GetItemString(self->files, path);
505    if (toc_entry != NULL)
506        return get_data(PyString_AsString(self->archive), toc_entry);
507
508    /* we have the module, but no source */
509    Py_INCREF(Py_None);
510    return Py_None;
511}
512
513PyDoc_STRVAR(doc_find_module,
514"find_module(fullname, path=None) -> self or None.\n\
515\n\
516Search for a module specified by 'fullname'. 'fullname' must be the\n\
517fully qualified (dotted) module name. It returns the zipimporter\n\
518instance itself if the module was found, or None if it wasn't.\n\
519The optional 'path' argument is ignored -- it's there for compatibility\n\
520with the importer protocol.");
521
522PyDoc_STRVAR(doc_load_module,
523"load_module(fullname) -> module.\n\
524\n\
525Load the module specified by 'fullname'. 'fullname' must be the\n\
526fully qualified (dotted) module name. It returns the imported\n\
527module, or raises ZipImportError if it wasn't found.");
528
529PyDoc_STRVAR(doc_get_data,
530"get_data(pathname) -> string with file data.\n\
531\n\
532Return the data associated with 'pathname'. Raise IOError if\n\
533the file wasn't found.");
534
535PyDoc_STRVAR(doc_is_package,
536"is_package(fullname) -> bool.\n\
537\n\
538Return True if the module specified by fullname is a package.\n\
539Raise ZipImportError if the module couldn't be found.");
540
541PyDoc_STRVAR(doc_get_code,
542"get_code(fullname) -> code object.\n\
543\n\
544Return the code object for the specified module. Raise ZipImportError\n\
545if the module couldn't be found.");
546
547PyDoc_STRVAR(doc_get_source,
548"get_source(fullname) -> source string.\n\
549\n\
550Return the source code for the specified module. Raise ZipImportError\n\
551if the module couldn't be found, return None if the archive does\n\
552contain the module, but has no source for it.");
553
554
555PyDoc_STRVAR(doc_get_filename,
556"get_filename(fullname) -> filename string.\n\
557\n\
558Return the filename for the specified module.");
559
560static PyMethodDef zipimporter_methods[] = {
561    {"find_module", zipimporter_find_module, METH_VARARGS,
562     doc_find_module},
563    {"load_module", zipimporter_load_module, METH_VARARGS,
564     doc_load_module},
565    {"get_data", zipimporter_get_data, METH_VARARGS,
566     doc_get_data},
567    {"get_code", zipimporter_get_code, METH_VARARGS,
568     doc_get_code},
569    {"get_source", zipimporter_get_source, METH_VARARGS,
570     doc_get_source},
571    {"get_filename", zipimporter_get_filename, METH_VARARGS,
572     doc_get_filename},
573    {"is_package", zipimporter_is_package, METH_VARARGS,
574     doc_is_package},
575    {NULL,              NULL}   /* sentinel */
576};
577
578static PyMemberDef zipimporter_members[] = {
579    {"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
580    {"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
581    {"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
582    {NULL}
583};
584
585PyDoc_STRVAR(zipimporter_doc,
586"zipimporter(archivepath) -> zipimporter object\n\
587\n\
588Create a new zipimporter instance. 'archivepath' must be a path to\n\
589a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
590'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
591valid directory inside the archive.\n\
592\n\
593'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
594archive.\n\
595\n\
596The 'archive' attribute of zipimporter objects contains the name of the\n\
597zipfile targeted.");
598
599#define DEFERRED_ADDRESS(ADDR) 0
600
601static PyTypeObject ZipImporter_Type = {
602    PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
603    "zipimport.zipimporter",
604    sizeof(ZipImporter),
605    0,                                          /* tp_itemsize */
606    (destructor)zipimporter_dealloc,            /* tp_dealloc */
607    0,                                          /* tp_print */
608    0,                                          /* tp_getattr */
609    0,                                          /* tp_setattr */
610    0,                                          /* tp_compare */
611    (reprfunc)zipimporter_repr,                 /* tp_repr */
612    0,                                          /* tp_as_number */
613    0,                                          /* tp_as_sequence */
614    0,                                          /* tp_as_mapping */
615    0,                                          /* tp_hash */
616    0,                                          /* tp_call */
617    0,                                          /* tp_str */
618    PyObject_GenericGetAttr,                    /* tp_getattro */
619    0,                                          /* tp_setattro */
620    0,                                          /* tp_as_buffer */
621    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
622        Py_TPFLAGS_HAVE_GC,                     /* tp_flags */
623    zipimporter_doc,                            /* tp_doc */
624    zipimporter_traverse,                       /* tp_traverse */
625    0,                                          /* tp_clear */
626    0,                                          /* tp_richcompare */
627    0,                                          /* tp_weaklistoffset */
628    0,                                          /* tp_iter */
629    0,                                          /* tp_iternext */
630    zipimporter_methods,                        /* tp_methods */
631    zipimporter_members,                        /* tp_members */
632    0,                                          /* tp_getset */
633    0,                                          /* tp_base */
634    0,                                          /* tp_dict */
635    0,                                          /* tp_descr_get */
636    0,                                          /* tp_descr_set */
637    0,                                          /* tp_dictoffset */
638    (initproc)zipimporter_init,                 /* tp_init */
639    PyType_GenericAlloc,                        /* tp_alloc */
640    PyType_GenericNew,                          /* tp_new */
641    PyObject_GC_Del,                            /* tp_free */
642};
643
644
645/* implementation */
646
647/* Given a buffer, return the long that is represented by the first
648   4 bytes, encoded as little endian. This partially reimplements
649   marshal.c:r_long() */
650static long
651get_long(unsigned char *buf) {
652    long x;
653    x =  buf[0];
654    x |= (long)buf[1] <<  8;
655    x |= (long)buf[2] << 16;
656    x |= (long)buf[3] << 24;
657#if SIZEOF_LONG > 4
658    /* Sign extension for 64-bit machines */
659    x |= -(x & 0x80000000L);
660#endif
661    return x;
662}
663
664/*
665   read_directory(archive) -> files dict (new reference)
666
667   Given a path to a Zip archive, build a dict, mapping file names
668   (local to the archive, using SEP as a separator) to toc entries.
669
670   A toc_entry is a tuple:
671
672       (__file__,      # value to use for __file__, available for all files
673    compress,      # compression kind; 0 for uncompressed
674    data_size,     # size of compressed data on disk
675    file_size,     # size of decompressed data
676    file_offset,   # offset of file header from start of archive
677    time,          # mod time of file (in dos format)
678    date,          # mod data of file (in dos format)
679    crc,           # crc checksum of the data
680       )
681
682   Directories can be recognized by the trailing SEP in the name,
683   data_size and file_offset are 0.
684*/
685static PyObject *
686read_directory(char *archive)
687{
688    PyObject *files = NULL;
689    FILE *fp;
690    long compress, crc, data_size, file_size, file_offset, date, time;
691    long header_offset, name_size, header_size, header_position;
692    long i, l, count;
693    size_t length;
694    char path[MAXPATHLEN + 5];
695    char name[MAXPATHLEN + 5];
696    char *p, endof_central_dir[22];
697    long arc_offset; /* offset from beginning of file to start of zip-archive */
698
699    if (strlen(archive) > MAXPATHLEN) {
700        PyErr_SetString(PyExc_OverflowError,
701                        "Zip path name is too long");
702        return NULL;
703    }
704    strcpy(path, archive);
705
706    fp = fopen(archive, "rb");
707    if (fp == NULL) {
708        PyErr_Format(ZipImportError, "can't open Zip file: "
709                     "'%.200s'", archive);
710        return NULL;
711    }
712    fseek(fp, -22, SEEK_END);
713    header_position = ftell(fp);
714    if (fread(endof_central_dir, 1, 22, fp) != 22) {
715        fclose(fp);
716        PyErr_Format(ZipImportError, "can't read Zip file: "
717                     "'%.200s'", archive);
718        return NULL;
719    }
720    if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
721        /* Bad: End of Central Dir signature */
722        fclose(fp);
723        PyErr_Format(ZipImportError, "not a Zip file: "
724                     "'%.200s'", archive);
725        return NULL;
726    }
727
728    header_size = get_long((unsigned char *)endof_central_dir + 12);
729    header_offset = get_long((unsigned char *)endof_central_dir + 16);
730    arc_offset = header_position - header_offset - header_size;
731    header_offset += arc_offset;
732
733    files = PyDict_New();
734    if (files == NULL)
735        goto error;
736
737    length = (long)strlen(path);
738    path[length] = SEP;
739
740    /* Start of Central Directory */
741    count = 0;
742    for (;;) {
743        PyObject *t;
744        int err;
745
746        fseek(fp, header_offset, 0);  /* Start of file header */
747        l = PyMarshal_ReadLongFromFile(fp);
748        if (l != 0x02014B50)
749            break;              /* Bad: Central Dir File Header */
750        fseek(fp, header_offset + 10, 0);
751        compress = PyMarshal_ReadShortFromFile(fp);
752        time = PyMarshal_ReadShortFromFile(fp);
753        date = PyMarshal_ReadShortFromFile(fp);
754        crc = PyMarshal_ReadLongFromFile(fp);
755        data_size = PyMarshal_ReadLongFromFile(fp);
756        file_size = PyMarshal_ReadLongFromFile(fp);
757        name_size = PyMarshal_ReadShortFromFile(fp);
758        header_size = 46 + name_size +
759           PyMarshal_ReadShortFromFile(fp) +
760           PyMarshal_ReadShortFromFile(fp);
761        fseek(fp, header_offset + 42, 0);
762        file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
763        if (name_size > MAXPATHLEN)
764            name_size = MAXPATHLEN;
765
766        p = name;
767        for (i = 0; i < name_size; i++) {
768            *p = (char)getc(fp);
769            if (*p == '/')
770                *p = SEP;
771            p++;
772        }
773        *p = 0;         /* Add terminating null byte */
774        header_offset += header_size;
775
776        strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
777
778        t = Py_BuildValue("siiiiiii", path, compress, data_size,
779                          file_size, file_offset, time, date, crc);
780        if (t == NULL)
781            goto error;
782        err = PyDict_SetItemString(files, name, t);
783        Py_DECREF(t);
784        if (err != 0)
785            goto error;
786        count++;
787    }
788    fclose(fp);
789    if (Py_VerboseFlag)
790        PySys_WriteStderr("# zipimport: found %ld names in %s\n",
791            count, archive);
792    return files;
793error:
794    fclose(fp);
795    Py_XDECREF(files);
796    return NULL;
797}
798
799/* Return the zlib.decompress function object, or NULL if zlib couldn't
800   be imported. The function is cached when found, so subsequent calls
801   don't import zlib again. */
802static PyObject *
803get_decompress_func(void)
804{
805    static int importing_zlib = 0;
806    PyObject *zlib;
807    PyObject *decompress;
808
809    if (importing_zlib != 0)
810        /* Someone has a zlib.py[co] in their Zip file;
811           let's avoid a stack overflow. */
812        return NULL;
813    importing_zlib = 1;
814    zlib = PyImport_ImportModuleNoBlock("zlib");
815    importing_zlib = 0;
816    if (zlib != NULL) {
817        decompress = PyObject_GetAttrString(zlib,
818                                            "decompress");
819        Py_DECREF(zlib);
820    }
821    else {
822        PyErr_Clear();
823        decompress = NULL;
824    }
825    if (Py_VerboseFlag)
826        PySys_WriteStderr("# zipimport: zlib %s\n",
827            zlib != NULL ? "available": "UNAVAILABLE");
828    return decompress;
829}
830
831/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
832   data as a new reference. */
833static PyObject *
834get_data(char *archive, PyObject *toc_entry)
835{
836    PyObject *raw_data, *data = NULL, *decompress;
837    char *buf;
838    FILE *fp;
839    int err;
840    Py_ssize_t bytes_read = 0;
841    long l;
842    char *datapath;
843    long compress, data_size, file_size, file_offset;
844    long time, date, crc;
845
846    if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
847                          &data_size, &file_size, &file_offset, &time,
848                          &date, &crc)) {
849        return NULL;
850    }
851
852    fp = fopen(archive, "rb");
853    if (!fp) {
854        PyErr_Format(PyExc_IOError,
855           "zipimport: can not open file %s", archive);
856        return NULL;
857    }
858
859    /* Check to make sure the local file header is correct */
860    fseek(fp, file_offset, 0);
861    l = PyMarshal_ReadLongFromFile(fp);
862    if (l != 0x04034B50) {
863        /* Bad: Local File Header */
864        PyErr_Format(ZipImportError,
865                     "bad local file header in %s",
866                     archive);
867        fclose(fp);
868        return NULL;
869    }
870    fseek(fp, file_offset + 26, 0);
871    l = 30 + PyMarshal_ReadShortFromFile(fp) +
872        PyMarshal_ReadShortFromFile(fp);        /* local header size */
873    file_offset += l;           /* Start of file data */
874
875    raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
876                                          data_size : data_size + 1);
877    if (raw_data == NULL) {
878        fclose(fp);
879        return NULL;
880    }
881    buf = PyString_AsString(raw_data);
882
883    err = fseek(fp, file_offset, 0);
884    if (err == 0)
885        bytes_read = fread(buf, 1, data_size, fp);
886    fclose(fp);
887    if (err || bytes_read != data_size) {
888        PyErr_SetString(PyExc_IOError,
889                        "zipimport: can't read data");
890        Py_DECREF(raw_data);
891        return NULL;
892    }
893
894    if (compress != 0) {
895        buf[data_size] = 'Z';  /* saw this in zipfile.py */
896        data_size++;
897    }
898    buf[data_size] = '\0';
899
900    if (compress == 0)  /* data is not compressed */
901        return raw_data;
902
903    /* Decompress with zlib */
904    decompress = get_decompress_func();
905    if (decompress == NULL) {
906        PyErr_SetString(ZipImportError,
907                        "can't decompress data; "
908                        "zlib not available");
909        goto error;
910    }
911    data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
912    Py_DECREF(decompress);
913error:
914    Py_DECREF(raw_data);
915    return data;
916}
917
918/* Lenient date/time comparison function. The precision of the mtime
919   in the archive is lower than the mtime stored in a .pyc: we
920   must allow a difference of at most one second. */
921static int
922eq_mtime(time_t t1, time_t t2)
923{
924    time_t d = t1 - t2;
925    if (d < 0)
926        d = -d;
927    /* dostime only stores even seconds, so be lenient */
928    return d <= 1;
929}
930
931/* Given the contents of a .py[co] file in a buffer, unmarshal the data
932   and return the code object. Return None if it the magic word doesn't
933   match (we do this instead of raising an exception as we fall back
934   to .py if available and we don't want to mask other errors).
935   Returns a new reference. */
936static PyObject *
937unmarshal_code(char *pathname, PyObject *data, time_t mtime)
938{
939    PyObject *code;
940    char *buf = PyString_AsString(data);
941    Py_ssize_t size = PyString_Size(data);
942
943    if (size <= 9) {
944        PyErr_SetString(ZipImportError,
945                        "bad pyc data");
946        return NULL;
947    }
948
949    if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
950        if (Py_VerboseFlag)
951            PySys_WriteStderr("# %s has bad magic\n",
952                              pathname);
953        Py_INCREF(Py_None);
954        return Py_None;  /* signal caller to try alternative */
955    }
956
957    if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
958                                mtime)) {
959        if (Py_VerboseFlag)
960            PySys_WriteStderr("# %s has bad mtime\n",
961                              pathname);
962        Py_INCREF(Py_None);
963        return Py_None;  /* signal caller to try alternative */
964    }
965
966    code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
967    if (code == NULL)
968        return NULL;
969    if (!PyCode_Check(code)) {
970        Py_DECREF(code);
971        PyErr_Format(PyExc_TypeError,
972             "compiled module %.200s is not a code object",
973             pathname);
974        return NULL;
975    }
976    return code;
977}
978
979/* Replace any occurances of "\r\n?" in the input string with "\n".
980   This converts DOS and Mac line endings to Unix line endings.
981   Also append a trailing "\n" to be compatible with
982   PyParser_SimpleParseFile(). Returns a new reference. */
983static PyObject *
984normalize_line_endings(PyObject *source)
985{
986    char *buf, *q, *p = PyString_AsString(source);
987    PyObject *fixed_source;
988
989    if (!p)
990        return NULL;
991
992    /* one char extra for trailing \n and one for terminating \0 */
993    buf = (char *)PyMem_Malloc(PyString_Size(source) + 2);
994    if (buf == NULL) {
995        PyErr_SetString(PyExc_MemoryError,
996                        "zipimport: no memory to allocate "
997                        "source buffer");
998        return NULL;
999    }
1000    /* replace "\r\n?" by "\n" */
1001    for (q = buf; *p != '\0'; p++) {
1002        if (*p == '\r') {
1003            *q++ = '\n';
1004            if (*(p + 1) == '\n')
1005                p++;
1006        }
1007        else
1008            *q++ = *p;
1009    }
1010    *q++ = '\n';  /* add trailing \n */
1011    *q = '\0';
1012    fixed_source = PyString_FromString(buf);
1013    PyMem_Free(buf);
1014    return fixed_source;
1015}
1016
1017/* Given a string buffer containing Python source code, compile it
1018   return and return a code object as a new reference. */
1019static PyObject *
1020compile_source(char *pathname, PyObject *source)
1021{
1022    PyObject *code, *fixed_source;
1023
1024    fixed_source = normalize_line_endings(source);
1025    if (fixed_source == NULL)
1026        return NULL;
1027
1028    code = Py_CompileString(PyString_AsString(fixed_source), pathname,
1029                            Py_file_input);
1030    Py_DECREF(fixed_source);
1031    return code;
1032}
1033
1034/* Convert the date/time values found in the Zip archive to a value
1035   that's compatible with the time stamp stored in .pyc files. */
1036static time_t
1037parse_dostime(int dostime, int dosdate)
1038{
1039    struct tm stm;
1040
1041    memset((void *) &stm, '\0', sizeof(stm));
1042
1043    stm.tm_sec   =  (dostime        & 0x1f) * 2;
1044    stm.tm_min   =  (dostime >> 5)  & 0x3f;
1045    stm.tm_hour  =  (dostime >> 11) & 0x1f;
1046    stm.tm_mday  =   dosdate        & 0x1f;
1047    stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1048    stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1049    stm.tm_isdst =   -1; /* wday/yday is ignored */
1050
1051    return mktime(&stm);
1052}
1053
1054/* Given a path to a .pyc or .pyo file in the archive, return the
1055   modification time of the matching .py file, or 0 if no source
1056   is available. */
1057static time_t
1058get_mtime_of_source(ZipImporter *self, char *path)
1059{
1060    PyObject *toc_entry;
1061    time_t mtime = 0;
1062    Py_ssize_t lastchar = strlen(path) - 1;
1063    char savechar = path[lastchar];
1064    path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
1065    toc_entry = PyDict_GetItemString(self->files, path);
1066    if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1067        PyTuple_Size(toc_entry) == 8) {
1068        /* fetch the time stamp of the .py file for comparison
1069           with an embedded pyc time stamp */
1070        int time, date;
1071        time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1072        date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1073        mtime = parse_dostime(time, date);
1074    }
1075    path[lastchar] = savechar;
1076    return mtime;
1077}
1078
1079/* Return the code object for the module named by 'fullname' from the
1080   Zip archive as a new reference. */
1081static PyObject *
1082get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1083                   time_t mtime, PyObject *toc_entry)
1084{
1085    PyObject *data, *code;
1086    char *modpath;
1087    char *archive = PyString_AsString(self->archive);
1088
1089    if (archive == NULL)
1090        return NULL;
1091
1092    data = get_data(archive, toc_entry);
1093    if (data == NULL)
1094        return NULL;
1095
1096    modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1097
1098    if (isbytecode) {
1099        code = unmarshal_code(modpath, data, mtime);
1100    }
1101    else {
1102        code = compile_source(modpath, data);
1103    }
1104    Py_DECREF(data);
1105    return code;
1106}
1107
1108/* Get the code object associated with the module specified by
1109   'fullname'. */
1110static PyObject *
1111get_module_code(ZipImporter *self, char *fullname,
1112                int *p_ispackage, char **p_modpath)
1113{
1114    PyObject *toc_entry;
1115    char *subname, path[MAXPATHLEN + 1];
1116    int len;
1117    struct st_zip_searchorder *zso;
1118
1119    subname = get_subname(fullname);
1120
1121    len = make_filename(PyString_AsString(self->prefix), subname, path);
1122    if (len < 0)
1123        return NULL;
1124
1125    for (zso = zip_searchorder; *zso->suffix; zso++) {
1126        PyObject *code = NULL;
1127
1128        strcpy(path + len, zso->suffix);
1129        if (Py_VerboseFlag > 1)
1130            PySys_WriteStderr("# trying %s%c%s\n",
1131                              PyString_AsString(self->archive),
1132                              SEP, path);
1133        toc_entry = PyDict_GetItemString(self->files, path);
1134        if (toc_entry != NULL) {
1135            time_t mtime = 0;
1136            int ispackage = zso->type & IS_PACKAGE;
1137            int isbytecode = zso->type & IS_BYTECODE;
1138
1139            if (isbytecode)
1140                mtime = get_mtime_of_source(self, path);
1141            if (p_ispackage != NULL)
1142                *p_ispackage = ispackage;
1143            code = get_code_from_data(self, ispackage,
1144                                      isbytecode, mtime,
1145                                      toc_entry);
1146            if (code == Py_None) {
1147                /* bad magic number or non-matching mtime
1148                   in byte code, try next */
1149                Py_DECREF(code);
1150                continue;
1151            }
1152            if (code != NULL && p_modpath != NULL)
1153                *p_modpath = PyString_AsString(
1154                    PyTuple_GetItem(toc_entry, 0));
1155            return code;
1156        }
1157    }
1158    PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1159    return NULL;
1160}
1161
1162
1163/* Module init */
1164
1165PyDoc_STRVAR(zipimport_doc,
1166"zipimport provides support for importing Python modules from Zip archives.\n\
1167\n\
1168This module exports three objects:\n\
1169- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1170- ZipImportError: exception raised by zipimporter objects. It's a\n\
1171  subclass of ImportError, so it can be caught as ImportError, too.\n\
1172- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1173  info dicts, as used in zipimporter._files.\n\
1174\n\
1175It is usually not needed to use the zipimport module explicitly; it is\n\
1176used by the builtin import mechanism for sys.path items that are paths\n\
1177to Zip archives.");
1178
1179PyMODINIT_FUNC
1180initzipimport(void)
1181{
1182    PyObject *mod;
1183
1184    if (PyType_Ready(&ZipImporter_Type) < 0)
1185        return;
1186
1187    /* Correct directory separator */
1188    zip_searchorder[0].suffix[0] = SEP;
1189    zip_searchorder[1].suffix[0] = SEP;
1190    zip_searchorder[2].suffix[0] = SEP;
1191    if (Py_OptimizeFlag) {
1192        /* Reverse *.pyc and *.pyo */
1193        struct st_zip_searchorder tmp;
1194        tmp = zip_searchorder[0];
1195        zip_searchorder[0] = zip_searchorder[1];
1196        zip_searchorder[1] = tmp;
1197        tmp = zip_searchorder[3];
1198        zip_searchorder[3] = zip_searchorder[4];
1199        zip_searchorder[4] = tmp;
1200    }
1201
1202    mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1203                         NULL, PYTHON_API_VERSION);
1204    if (mod == NULL)
1205        return;
1206
1207    ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1208                                        PyExc_ImportError, NULL);
1209    if (ZipImportError == NULL)
1210        return;
1211
1212    Py_INCREF(ZipImportError);
1213    if (PyModule_AddObject(mod, "ZipImportError",
1214                           ZipImportError) < 0)
1215        return;
1216
1217    Py_INCREF(&ZipImporter_Type);
1218    if (PyModule_AddObject(mod, "zipimporter",
1219                           (PyObject *)&ZipImporter_Type) < 0)
1220        return;
1221
1222    zip_directory_cache = PyDict_New();
1223    if (zip_directory_cache == NULL)
1224        return;
1225    Py_INCREF(zip_directory_cache);
1226    if (PyModule_AddObject(mod, "_zip_directory_cache",
1227                           zip_directory_cache) < 0)
1228        return;
1229}
1230