zipimport.c revision 679db4aa99352abc3c9d93dcfc30e772760a43da
1#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include <time.h>
6
7
8#define IS_SOURCE   0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE  0x2
11
12struct st_zip_searchorder {
13	char suffix[14];
14	int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18   archive: we first search for a package __init__, then for
19   non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20   are swapped by initzipimport() if we run in optimized mode. Also,
21   '/' is replaced by SEP there. */
22static struct st_zip_searchorder zip_searchorder[] = {
23	{"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24	{"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25	{"/__init__.py", IS_PACKAGE | IS_SOURCE},
26	{".pyc", IS_BYTECODE},
27	{".pyo", IS_BYTECODE},
28	{".py", IS_SOURCE},
29	{"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37	PyObject_HEAD
38	PyObject *archive;  /* pathname of the Zip archive */
39	PyObject *prefix;   /* file prefix: "a/sub/directory/" */
40	PyObject *files;    /* dict with file info {path: toc_entry} */
41};
42
43static PyObject *ZipImportError;
44static PyObject *zip_directory_cache = NULL;
45
46/* forward decls */
47static PyObject *read_directory(char *archive);
48static PyObject *get_data(char *archive, PyObject *toc_entry);
49static PyObject *get_module_code(ZipImporter *self, char *fullname,
50				 int *p_ispackage, char **p_modpath);
51
52
53#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54
55
56/* zipimporter.__init__
57   Split the "subdirectory" from the Zip archive path, lookup a matching
58   entry in sys.path_importer_cache, fetch the file directory from there
59   if found, or else read it from the archive. */
60static int
61zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62{
63	char *path, *p, *prefix, buf[MAXPATHLEN+2];
64	size_t len;
65
66	if (!_PyArg_NoKeywords("zipimporter()", kwds))
67		return -1;
68
69	if (!PyArg_ParseTuple(args, "s:zipimporter",
70			      &path))
71		return -1;
72
73	len = strlen(path);
74	if (len == 0) {
75		PyErr_SetString(ZipImportError, "archive path is empty");
76		return -1;
77	}
78	if (len >= MAXPATHLEN) {
79		PyErr_SetString(ZipImportError,
80				"archive path too long");
81		return -1;
82	}
83	strcpy(buf, path);
84
85#ifdef ALTSEP
86	for (p = buf; *p; p++) {
87		if (*p == ALTSEP)
88			*p = SEP;
89	}
90#endif
91
92	path = NULL;
93	prefix = NULL;
94	for (;;) {
95		struct stat statbuf;
96		int rv;
97
98		rv = stat(buf, &statbuf);
99		if (rv == 0) {
100			/* it exists */
101			if (S_ISREG(statbuf.st_mode))
102				/* it's a file */
103				path = buf;
104			break;
105		}
106		/* back up one path element */
107		p = strrchr(buf, SEP);
108		if (prefix != NULL)
109			*prefix = SEP;
110		if (p == NULL)
111			break;
112		*p = '\0';
113		prefix = p;
114	}
115	if (path != NULL) {
116		PyObject *files;
117		files = PyDict_GetItemString(zip_directory_cache, path);
118		if (files == NULL) {
119			files = read_directory(buf);
120			if (files == NULL)
121				return -1;
122			if (PyDict_SetItemString(zip_directory_cache, path,
123						 files) != 0)
124				return -1;
125		}
126		else
127			Py_INCREF(files);
128		self->files = files;
129	}
130	else {
131		PyErr_SetString(ZipImportError, "not a Zip file");
132		return -1;
133	}
134
135	if (prefix == NULL)
136		prefix = "";
137	else {
138		prefix++;
139		len = strlen(prefix);
140		if (prefix[len-1] != SEP) {
141			/* add trailing SEP */
142			prefix[len] = SEP;
143			prefix[len + 1] = '\0';
144		}
145	}
146
147	self->archive = PyUnicode_FromString(buf);
148	if (self->archive == NULL)
149		return -1;
150
151	self->prefix = PyUnicode_FromString(prefix);
152	if (self->prefix == NULL)
153		return -1;
154
155	return 0;
156}
157
158/* GC support. */
159static int
160zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
161{
162	ZipImporter *self = (ZipImporter *)obj;
163	Py_VISIT(self->files);
164	return 0;
165}
166
167static void
168zipimporter_dealloc(ZipImporter *self)
169{
170	PyObject_GC_UnTrack(self);
171	Py_XDECREF(self->archive);
172	Py_XDECREF(self->prefix);
173	Py_XDECREF(self->files);
174	Py_TYPE(self)->tp_free((PyObject *)self);
175}
176
177static PyObject *
178zipimporter_repr(ZipImporter *self)
179{
180	char *archive = "???";
181	char *prefix = "";
182
183	if (self->archive != NULL && PyUnicode_Check(self->archive))
184		archive = PyUnicode_AsString(self->archive);
185	if (self->prefix != NULL && PyUnicode_Check(self->prefix))
186		prefix = PyUnicode_AsString(self->prefix);
187	if (prefix != NULL && *prefix)
188		return PyUnicode_FromFormat("<zipimporter object \"%.300s%c%.150s\">",
189		                            archive, SEP, prefix);
190	else
191		return PyUnicode_FromFormat("<zipimporter object \"%.300s\">",
192		                            archive);
193}
194
195/* return fullname.split(".")[-1] */
196static char *
197get_subname(char *fullname)
198{
199	char *subname = strrchr(fullname, '.');
200	if (subname == NULL)
201		subname = fullname;
202	else
203		subname++;
204	return subname;
205}
206
207/* Given a (sub)modulename, write the potential file path in the
208   archive (without extension) to the path buffer. Return the
209   length of the resulting string. */
210static int
211make_filename(char *prefix, char *name, char *path)
212{
213	size_t len;
214	char *p;
215
216	len = strlen(prefix);
217
218	/* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
219	if (len + strlen(name) + 13 >= MAXPATHLEN) {
220		PyErr_SetString(ZipImportError, "path too long");
221		return -1;
222	}
223
224	strcpy(path, prefix);
225	strcpy(path + len, name);
226	for (p = path + len; *p; p++) {
227		if (*p == '.')
228			*p = SEP;
229	}
230	len += strlen(name);
231	assert(len < INT_MAX);
232	return (int)len;
233}
234
235enum zi_module_info {
236	MI_ERROR,
237	MI_NOT_FOUND,
238	MI_MODULE,
239	MI_PACKAGE
240};
241
242/* Return some information about a module. */
243static enum zi_module_info
244get_module_info(ZipImporter *self, char *fullname)
245{
246	char *subname, path[MAXPATHLEN + 1];
247	int len;
248	struct st_zip_searchorder *zso;
249
250	subname = get_subname(fullname);
251
252	len = make_filename(PyUnicode_AsString(self->prefix), subname, path);
253	if (len < 0)
254		return MI_ERROR;
255
256	for (zso = zip_searchorder; *zso->suffix; zso++) {
257		strcpy(path + len, zso->suffix);
258		if (PyDict_GetItemString(self->files, path) != NULL) {
259			if (zso->type & IS_PACKAGE)
260				return MI_PACKAGE;
261			else
262				return MI_MODULE;
263		}
264	}
265	return MI_NOT_FOUND;
266}
267
268/* Check whether we can satisfy the import of the module named by
269   'fullname'. Return self if we can, None if we can't. */
270static PyObject *
271zipimporter_find_module(PyObject *obj, PyObject *args)
272{
273	ZipImporter *self = (ZipImporter *)obj;
274	PyObject *path = NULL;
275	char *fullname;
276	enum zi_module_info mi;
277
278	if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
279			      &fullname, &path))
280		return NULL;
281
282	mi = get_module_info(self, fullname);
283	if (mi == MI_ERROR)
284		return NULL;
285	if (mi == MI_NOT_FOUND) {
286		Py_INCREF(Py_None);
287		return Py_None;
288	}
289	Py_INCREF(self);
290	return (PyObject *)self;
291}
292
293/* Load and return the module named by 'fullname'. */
294static PyObject *
295zipimporter_load_module(PyObject *obj, PyObject *args)
296{
297	ZipImporter *self = (ZipImporter *)obj;
298	PyObject *code, *mod, *dict;
299	char *fullname, *modpath;
300	int ispackage;
301
302	if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
303			      &fullname))
304		return NULL;
305
306	code = get_module_code(self, fullname, &ispackage, &modpath);
307	if (code == NULL)
308		return NULL;
309
310	mod = PyImport_AddModule(fullname);
311	if (mod == NULL) {
312		Py_DECREF(code);
313		return NULL;
314	}
315	dict = PyModule_GetDict(mod);
316
317	/* mod.__loader__ = self */
318	if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
319		goto error;
320
321	if (ispackage) {
322		/* add __path__ to the module *before* the code gets
323		   executed */
324		PyObject *pkgpath, *fullpath;
325		char *prefix = PyUnicode_AsString(self->prefix);
326		char *subname = get_subname(fullname);
327		int err;
328
329		fullpath = PyUnicode_FromFormat("%s%c%s%s",
330					PyUnicode_AsString(self->archive),
331					SEP,
332					*prefix ? prefix : "",
333					subname);
334		if (fullpath == NULL)
335			goto error;
336
337		pkgpath = Py_BuildValue("[O]", fullpath);
338		Py_DECREF(fullpath);
339		if (pkgpath == NULL)
340			goto error;
341		err = PyDict_SetItemString(dict, "__path__", pkgpath);
342		Py_DECREF(pkgpath);
343		if (err != 0)
344			goto error;
345	}
346	mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
347	Py_DECREF(code);
348	if (Py_VerboseFlag)
349		PySys_WriteStderr("import %s # loaded from Zip %s\n",
350				  fullname, modpath);
351	return mod;
352error:
353	Py_DECREF(code);
354	Py_DECREF(mod);
355	return NULL;
356}
357
358/* Return a bool signifying whether the module is a package or not. */
359static PyObject *
360zipimporter_is_package(PyObject *obj, PyObject *args)
361{
362	ZipImporter *self = (ZipImporter *)obj;
363	char *fullname;
364	enum zi_module_info mi;
365
366	if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
367			      &fullname))
368		return NULL;
369
370	mi = get_module_info(self, fullname);
371	if (mi == MI_ERROR)
372		return NULL;
373	if (mi == MI_NOT_FOUND) {
374		PyErr_Format(ZipImportError, "can't find module '%.200s'",
375			     fullname);
376		return NULL;
377	}
378	return PyBool_FromLong(mi == MI_PACKAGE);
379}
380
381static PyObject *
382zipimporter_get_data(PyObject *obj, PyObject *args)
383{
384	ZipImporter *self = (ZipImporter *)obj;
385	char *path;
386#ifdef ALTSEP
387	char *p, buf[MAXPATHLEN + 1];
388#endif
389	PyObject *toc_entry;
390	Py_ssize_t len;
391
392	if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
393		return NULL;
394
395#ifdef ALTSEP
396	if (strlen(path) >= MAXPATHLEN) {
397		PyErr_SetString(ZipImportError, "path too long");
398		return NULL;
399	}
400	strcpy(buf, path);
401	for (p = buf; *p; p++) {
402		if (*p == ALTSEP)
403			*p = SEP;
404	}
405	path = buf;
406#endif
407	len = PyUnicode_GET_SIZE(self->archive);
408	if ((size_t)len < strlen(path) &&
409	    strncmp(path, PyUnicode_AsString(self->archive), len) == 0 &&
410	    path[len] == SEP) {
411		path = path + len + 1;
412	}
413
414	toc_entry = PyDict_GetItemString(self->files, path);
415	if (toc_entry == NULL) {
416		PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
417		return NULL;
418	}
419	return get_data(PyUnicode_AsString(self->archive), toc_entry);
420}
421
422static PyObject *
423zipimporter_get_code(PyObject *obj, PyObject *args)
424{
425	ZipImporter *self = (ZipImporter *)obj;
426	char *fullname;
427
428	if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
429		return NULL;
430
431	return get_module_code(self, fullname, NULL, NULL);
432}
433
434static PyObject *
435zipimporter_get_source(PyObject *obj, PyObject *args)
436{
437	ZipImporter *self = (ZipImporter *)obj;
438	PyObject *toc_entry;
439	char *fullname, *subname, path[MAXPATHLEN+1];
440	int len;
441	enum zi_module_info mi;
442
443	if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
444		return NULL;
445
446	mi = get_module_info(self, fullname);
447	if (mi == MI_ERROR)
448		return NULL;
449	if (mi == MI_NOT_FOUND) {
450		PyErr_Format(ZipImportError, "can't find module '%.200s'",
451			     fullname);
452		return NULL;
453	}
454	subname = get_subname(fullname);
455
456	len = make_filename(PyUnicode_AsString(self->prefix), subname, path);
457	if (len < 0)
458		return NULL;
459
460	if (mi == MI_PACKAGE) {
461		path[len] = SEP;
462		strcpy(path + len + 1, "__init__.py");
463	}
464	else
465		strcpy(path + len, ".py");
466
467	toc_entry = PyDict_GetItemString(self->files, path);
468	if (toc_entry != NULL) {
469		PyObject *bytes = get_data(PyUnicode_AsString(self->archive), toc_entry);
470		PyObject *res = PyUnicode_FromString(PyBytes_AsString(bytes));
471		Py_XDECREF(bytes);
472		return res;
473	}
474
475	/* we have the module, but no source */
476	Py_INCREF(Py_None);
477	return Py_None;
478}
479
480PyDoc_STRVAR(doc_find_module,
481"find_module(fullname, path=None) -> self or None.\n\
482\n\
483Search for a module specified by 'fullname'. 'fullname' must be the\n\
484fully qualified (dotted) module name. It returns the zipimporter\n\
485instance itself if the module was found, or None if it wasn't.\n\
486The optional 'path' argument is ignored -- it's there for compatibility\n\
487with the importer protocol.");
488
489PyDoc_STRVAR(doc_load_module,
490"load_module(fullname) -> module.\n\
491\n\
492Load the module specified by 'fullname'. 'fullname' must be the\n\
493fully qualified (dotted) module name. It returns the imported\n\
494module, or raises ZipImportError if it wasn't found.");
495
496PyDoc_STRVAR(doc_get_data,
497"get_data(pathname) -> string with file data.\n\
498\n\
499Return the data associated with 'pathname'. Raise IOError if\n\
500the file wasn't found.");
501
502PyDoc_STRVAR(doc_is_package,
503"is_package(fullname) -> bool.\n\
504\n\
505Return True if the module specified by fullname is a package.\n\
506Raise ZipImportError is the module couldn't be found.");
507
508PyDoc_STRVAR(doc_get_code,
509"get_code(fullname) -> code object.\n\
510\n\
511Return the code object for the specified module. Raise ZipImportError\n\
512is the module couldn't be found.");
513
514PyDoc_STRVAR(doc_get_source,
515"get_source(fullname) -> source string.\n\
516\n\
517Return the source code for the specified module. Raise ZipImportError\n\
518is the module couldn't be found, return None if the archive does\n\
519contain the module, but has no source for it.");
520
521static PyMethodDef zipimporter_methods[] = {
522	{"find_module", zipimporter_find_module, METH_VARARGS,
523	 doc_find_module},
524	{"load_module", zipimporter_load_module, METH_VARARGS,
525	 doc_load_module},
526	{"get_data", zipimporter_get_data, METH_VARARGS,
527	 doc_get_data},
528	{"get_code", zipimporter_get_code, METH_VARARGS,
529	 doc_get_code},
530	{"get_source", zipimporter_get_source, METH_VARARGS,
531	 doc_get_source},
532	{"is_package", zipimporter_is_package, METH_VARARGS,
533	 doc_is_package},
534	{NULL,		NULL}	/* sentinel */
535};
536
537static PyMemberDef zipimporter_members[] = {
538	{"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
539	{"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
540	{"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
541	{NULL}
542};
543
544PyDoc_STRVAR(zipimporter_doc,
545"zipimporter(archivepath) -> zipimporter object\n\
546\n\
547Create a new zipimporter instance. 'archivepath' must be a path to\n\
548a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
549a valid Zip archive.");
550
551#define DEFERRED_ADDRESS(ADDR) 0
552
553static PyTypeObject ZipImporter_Type = {
554	PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
555	"zipimport.zipimporter",
556	sizeof(ZipImporter),
557	0,					/* tp_itemsize */
558	(destructor)zipimporter_dealloc,	/* tp_dealloc */
559	0,					/* tp_print */
560	0,					/* tp_getattr */
561	0,					/* tp_setattr */
562	0,					/* tp_compare */
563	(reprfunc)zipimporter_repr,		/* tp_repr */
564	0,					/* tp_as_number */
565	0,					/* tp_as_sequence */
566	0,					/* tp_as_mapping */
567	0,					/* tp_hash */
568	0,					/* tp_call */
569	0,					/* tp_str */
570	PyObject_GenericGetAttr,		/* tp_getattro */
571	0,					/* tp_setattro */
572	0,					/* tp_as_buffer */
573	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
574		Py_TPFLAGS_HAVE_GC,		/* tp_flags */
575	zipimporter_doc,			/* tp_doc */
576	zipimporter_traverse,			/* tp_traverse */
577	0,					/* tp_clear */
578	0,					/* tp_richcompare */
579	0,					/* tp_weaklistoffset */
580	0,					/* tp_iter */
581	0,					/* tp_iternext */
582	zipimporter_methods,			/* tp_methods */
583	zipimporter_members,			/* tp_members */
584	0,					/* tp_getset */
585	0,					/* tp_base */
586	0,					/* tp_dict */
587	0,					/* tp_descr_get */
588	0,					/* tp_descr_set */
589	0,					/* tp_dictoffset */
590	(initproc)zipimporter_init,		/* tp_init */
591	PyType_GenericAlloc,			/* tp_alloc */
592	PyType_GenericNew,			/* tp_new */
593	PyObject_GC_Del,			/* tp_free */
594};
595
596
597/* implementation */
598
599/* Given a buffer, return the long that is represented by the first
600   4 bytes, encoded as little endian. This partially reimplements
601   marshal.c:r_long() */
602static long
603get_long(unsigned char *buf) {
604	long x;
605	x =  buf[0];
606	x |= (long)buf[1] <<  8;
607	x |= (long)buf[2] << 16;
608	x |= (long)buf[3] << 24;
609#if SIZEOF_LONG > 4
610	/* Sign extension for 64-bit machines */
611	x |= -(x & 0x80000000L);
612#endif
613	return x;
614}
615
616/*
617   read_directory(archive) -> files dict (new reference)
618
619   Given a path to a Zip archive, build a dict, mapping file names
620   (local to the archive, using SEP as a separator) to toc entries.
621
622   A toc_entry is a tuple:
623
624       (__file__,      # value to use for __file__, available for all files
625        compress,      # compression kind; 0 for uncompressed
626        data_size,     # size of compressed data on disk
627        file_size,     # size of decompressed data
628        file_offset,   # offset of file header from start of archive
629        time,          # mod time of file (in dos format)
630        date,          # mod data of file (in dos format)
631        crc,           # crc checksum of the data
632       )
633
634   Directories can be recognized by the trailing SEP in the name,
635   data_size and file_offset are 0.
636*/
637static PyObject *
638read_directory(char *archive)
639{
640	PyObject *files = NULL;
641	FILE *fp;
642	long compress, crc, data_size, file_size, file_offset, date, time;
643	long header_offset, name_size, header_size, header_position;
644	long i, l, count;
645	size_t length;
646	char path[MAXPATHLEN + 5];
647	char name[MAXPATHLEN + 5];
648	char *p, endof_central_dir[22];
649	long arc_offset; /* offset from beginning of file to start of zip-archive */
650
651	if (strlen(archive) > MAXPATHLEN) {
652		PyErr_SetString(PyExc_OverflowError,
653				"Zip path name is too long");
654		return NULL;
655	}
656	strcpy(path, archive);
657
658	fp = fopen(archive, "rb");
659	if (fp == NULL) {
660		PyErr_Format(ZipImportError, "can't open Zip file: "
661			     "'%.200s'", archive);
662		return NULL;
663	}
664	fseek(fp, -22, SEEK_END);
665	header_position = ftell(fp);
666	if (fread(endof_central_dir, 1, 22, fp) != 22) {
667		fclose(fp);
668		PyErr_Format(ZipImportError, "can't read Zip file: "
669			     "'%.200s'", archive);
670		return NULL;
671	}
672	if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
673		/* Bad: End of Central Dir signature */
674		fclose(fp);
675		PyErr_Format(ZipImportError, "not a Zip file: "
676			     "'%.200s'", archive);
677		return NULL;
678	}
679
680	header_size = get_long((unsigned char *)endof_central_dir + 12);
681	header_offset = get_long((unsigned char *)endof_central_dir + 16);
682	arc_offset = header_position - header_offset - header_size;
683	header_offset += arc_offset;
684
685	files = PyDict_New();
686	if (files == NULL)
687		goto error;
688
689	length = (long)strlen(path);
690	path[length] = SEP;
691
692	/* Start of Central Directory */
693	count = 0;
694	for (;;) {
695		PyObject *t;
696		int err;
697
698		fseek(fp, header_offset, 0);  /* Start of file header */
699		l = PyMarshal_ReadLongFromFile(fp);
700		if (l != 0x02014B50)
701			break;	/* Bad: Central Dir File Header */
702		fseek(fp, header_offset + 10, 0);
703		compress = PyMarshal_ReadShortFromFile(fp);
704		time = PyMarshal_ReadShortFromFile(fp);
705		date = PyMarshal_ReadShortFromFile(fp);
706		crc = PyMarshal_ReadLongFromFile(fp);
707		data_size = PyMarshal_ReadLongFromFile(fp);
708		file_size = PyMarshal_ReadLongFromFile(fp);
709		name_size = PyMarshal_ReadShortFromFile(fp);
710		header_size = 46 + name_size +
711		   PyMarshal_ReadShortFromFile(fp) +
712		   PyMarshal_ReadShortFromFile(fp);
713		fseek(fp, header_offset + 42, 0);
714		file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
715		if (name_size > MAXPATHLEN)
716			name_size = MAXPATHLEN;
717
718		p = name;
719		for (i = 0; i < name_size; i++) {
720			*p = (char)getc(fp);
721			if (*p == '/')
722				*p = SEP;
723			p++;
724		}
725		*p = 0;	/* Add terminating null byte */
726		header_offset += header_size;
727
728		strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
729
730		t = Py_BuildValue("siiiiiii", path, compress, data_size,
731				  file_size, file_offset, time, date, crc);
732		if (t == NULL)
733			goto error;
734		err = PyDict_SetItemString(files, name, t);
735		Py_DECREF(t);
736		if (err != 0)
737			goto error;
738		count++;
739	}
740	fclose(fp);
741	if (Py_VerboseFlag)
742		PySys_WriteStderr("# zipimport: found %ld names in %s\n",
743			count, archive);
744	return files;
745error:
746	fclose(fp);
747	Py_XDECREF(files);
748	return NULL;
749}
750
751/* Return the zlib.decompress function object, or NULL if zlib couldn't
752   be imported. The function is cached when found, so subsequent calls
753   don't import zlib again. Returns a *borrowed* reference.
754   XXX This makes zlib.decompress immortal. */
755static PyObject *
756get_decompress_func(void)
757{
758	static PyObject *decompress = NULL;
759
760	if (decompress == NULL) {
761		PyObject *zlib;
762		static int importing_zlib = 0;
763
764		if (importing_zlib != 0)
765			/* Someone has a zlib.py[co] in their Zip file;
766			   let's avoid a stack overflow. */
767			return NULL;
768		importing_zlib = 1;
769		zlib = PyImport_ImportModuleNoBlock("zlib");
770		importing_zlib = 0;
771		if (zlib != NULL) {
772			decompress = PyObject_GetAttrString(zlib,
773							    "decompress");
774			Py_DECREF(zlib);
775		}
776		else
777			PyErr_Clear();
778		if (Py_VerboseFlag)
779			PySys_WriteStderr("# zipimport: zlib %s\n",
780				zlib != NULL ? "available": "UNAVAILABLE");
781	}
782	return decompress;
783}
784
785/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
786   data as a new reference. */
787static PyObject *
788get_data(char *archive, PyObject *toc_entry)
789{
790	PyObject *raw_data, *data = NULL, *decompress;
791	char *buf;
792	FILE *fp;
793	int err;
794	Py_ssize_t bytes_read = 0;
795	long l;
796	char *datapath;
797	long compress, data_size, file_size, file_offset, bytes_size;
798	long time, date, crc;
799
800	if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
801			      &data_size, &file_size, &file_offset, &time,
802			      &date, &crc)) {
803		return NULL;
804	}
805
806	fp = fopen(archive, "rb");
807	if (!fp) {
808		PyErr_Format(PyExc_IOError,
809		   "zipimport: can not open file %s", archive);
810		return NULL;
811	}
812
813	/* Check to make sure the local file header is correct */
814	fseek(fp, file_offset, 0);
815	l = PyMarshal_ReadLongFromFile(fp);
816	if (l != 0x04034B50) {
817		/* Bad: Local File Header */
818		PyErr_Format(ZipImportError,
819			     "bad local file header in %s",
820			     archive);
821		fclose(fp);
822		return NULL;
823	}
824	fseek(fp, file_offset + 26, 0);
825	l = 30 + PyMarshal_ReadShortFromFile(fp) +
826	    PyMarshal_ReadShortFromFile(fp);	/* local header size */
827	file_offset += l;	/* Start of file data */
828
829	bytes_size = compress == 0 ? data_size : data_size + 1;
830	if (bytes_size == 0)
831		bytes_size++;
832	raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
833
834	if (raw_data == NULL) {
835		fclose(fp);
836		return NULL;
837	}
838	buf = PyBytes_AsString(raw_data);
839
840	err = fseek(fp, file_offset, 0);
841	if (err == 0)
842		bytes_read = fread(buf, 1, data_size, fp);
843	fclose(fp);
844	if (err || bytes_read != data_size) {
845		PyErr_SetString(PyExc_IOError,
846				"zipimport: can't read data");
847		Py_DECREF(raw_data);
848		return NULL;
849	}
850
851	if (compress != 0) {
852		buf[data_size] = 'Z';  /* saw this in zipfile.py */
853		data_size++;
854	}
855	buf[data_size] = '\0';
856
857	if (compress == 0) {  /* data is not compressed */
858		data = PyBytes_FromStringAndSize(buf, data_size);
859		Py_DECREF(raw_data);
860		return data;
861	}
862
863	/* Decompress with zlib */
864	decompress = get_decompress_func();
865	if (decompress == NULL) {
866		PyErr_SetString(ZipImportError,
867				"can't decompress data; "
868				"zlib not available");
869		goto error;
870	}
871	data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
872error:
873	Py_DECREF(raw_data);
874	return data;
875}
876
877/* Lenient date/time comparison function. The precision of the mtime
878   in the archive is lower than the mtime stored in a .pyc: we
879   must allow a difference of at most one second. */
880static int
881eq_mtime(time_t t1, time_t t2)
882{
883	time_t d = t1 - t2;
884	if (d < 0)
885		d = -d;
886	/* dostime only stores even seconds, so be lenient */
887	return d <= 1;
888}
889
890/* Given the contents of a .py[co] file in a buffer, unmarshal the data
891   and return the code object. Return None if it the magic word doesn't
892   match (we do this instead of raising an exception as we fall back
893   to .py if available and we don't want to mask other errors).
894   Returns a new reference. */
895static PyObject *
896unmarshal_code(char *pathname, PyObject *data, time_t mtime)
897{
898	PyObject *code;
899	char *buf = PyBytes_AsString(data);
900	Py_ssize_t size = PyBytes_Size(data);
901
902	if (size <= 9) {
903		PyErr_SetString(ZipImportError,
904				"bad pyc data");
905		return NULL;
906	}
907
908	if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
909		if (Py_VerboseFlag)
910			PySys_WriteStderr("# %s has bad magic\n",
911					  pathname);
912		Py_INCREF(Py_None);
913		return Py_None;  /* signal caller to try alternative */
914	}
915
916	if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
917				    mtime)) {
918		if (Py_VerboseFlag)
919			PySys_WriteStderr("# %s has bad mtime\n",
920					  pathname);
921		Py_INCREF(Py_None);
922		return Py_None;  /* signal caller to try alternative */
923	}
924
925	code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
926	if (code == NULL)
927		return NULL;
928	if (!PyCode_Check(code)) {
929		Py_DECREF(code);
930		PyErr_Format(PyExc_TypeError,
931		     "compiled module %.200s is not a code object",
932		     pathname);
933		return NULL;
934	}
935	return code;
936}
937
938/* Replace any occurances of "\r\n?" in the input string with "\n".
939   This converts DOS and Mac line endings to Unix line endings.
940   Also append a trailing "\n" to be compatible with
941   PyParser_SimpleParseFile(). Returns a new reference. */
942static PyObject *
943normalize_line_endings(PyObject *source)
944{
945	char *buf, *q, *p = PyBytes_AsString(source);
946	PyObject *fixed_source;
947	int len = 0;
948
949	if (!p) {
950		return PyBytes_FromStringAndSize("\n\0", 2);
951	}
952
953	/* one char extra for trailing \n and one for terminating \0 */
954	buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
955	if (buf == NULL) {
956		PyErr_SetString(PyExc_MemoryError,
957				"zipimport: no memory to allocate "
958				"source buffer");
959		return NULL;
960	}
961	/* replace "\r\n?" by "\n" */
962	for (q = buf; *p != '\0'; p++) {
963		if (*p == '\r') {
964			*q++ = '\n';
965			if (*(p + 1) == '\n')
966				p++;
967		}
968		else
969			*q++ = *p;
970		len++;
971	}
972	*q++ = '\n';  /* add trailing \n */
973	*q = '\0';
974	fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
975	PyMem_Free(buf);
976	return fixed_source;
977}
978
979/* Given a string buffer containing Python source code, compile it
980   return and return a code object as a new reference. */
981static PyObject *
982compile_source(char *pathname, PyObject *source)
983{
984	PyObject *code, *fixed_source;
985
986	fixed_source = normalize_line_endings(source);
987	if (fixed_source == NULL)
988		return NULL;
989
990	code = Py_CompileString(PyBytes_AsString(fixed_source), pathname,
991				Py_file_input);
992	Py_DECREF(fixed_source);
993	return code;
994}
995
996/* Convert the date/time values found in the Zip archive to a value
997   that's compatible with the time stamp stored in .pyc files. */
998static time_t
999parse_dostime(int dostime, int dosdate)
1000{
1001	struct tm stm;
1002
1003	memset((void *) &stm, '\0', sizeof(stm));
1004
1005	stm.tm_sec   =  (dostime        & 0x1f) * 2;
1006	stm.tm_min   =  (dostime >> 5)  & 0x3f;
1007	stm.tm_hour  =  (dostime >> 11) & 0x1f;
1008	stm.tm_mday  =   dosdate        & 0x1f;
1009	stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1010	stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1011	stm.tm_isdst =   -1; /* wday/yday is ignored */
1012
1013	return mktime(&stm);
1014}
1015
1016/* Given a path to a .pyc or .pyo file in the archive, return the
1017   modifictaion time of the matching .py file, or 0 if no source
1018   is available. */
1019static time_t
1020get_mtime_of_source(ZipImporter *self, char *path)
1021{
1022	PyObject *toc_entry;
1023	time_t mtime = 0;
1024	Py_ssize_t lastchar = strlen(path) - 1;
1025	char savechar = path[lastchar];
1026	path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
1027	toc_entry = PyDict_GetItemString(self->files, path);
1028	if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1029	    PyTuple_Size(toc_entry) == 8) {
1030		/* fetch the time stamp of the .py file for comparison
1031		   with an embedded pyc time stamp */
1032		int time, date;
1033		time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1034		date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1035		mtime = parse_dostime(time, date);
1036	}
1037	path[lastchar] = savechar;
1038	return mtime;
1039}
1040
1041/* Return the code object for the module named by 'fullname' from the
1042   Zip archive as a new reference. */
1043static PyObject *
1044get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1045		   time_t mtime, PyObject *toc_entry)
1046{
1047	PyObject *data, *code;
1048	char *modpath;
1049	char *archive = PyUnicode_AsString(self->archive);
1050
1051	if (archive == NULL)
1052		return NULL;
1053
1054	data = get_data(archive, toc_entry);
1055	if (data == NULL)
1056		return NULL;
1057
1058	modpath = PyUnicode_AsString(PyTuple_GetItem(toc_entry, 0));
1059
1060	if (isbytecode) {
1061		code = unmarshal_code(modpath, data, mtime);
1062	}
1063	else {
1064		code = compile_source(modpath, data);
1065	}
1066	Py_DECREF(data);
1067	return code;
1068}
1069
1070/* Get the code object assoiciated with the module specified by
1071   'fullname'. */
1072static PyObject *
1073get_module_code(ZipImporter *self, char *fullname,
1074		int *p_ispackage, char **p_modpath)
1075{
1076	PyObject *toc_entry;
1077	char *subname, path[MAXPATHLEN + 1];
1078	int len;
1079	struct st_zip_searchorder *zso;
1080
1081	subname = get_subname(fullname);
1082
1083	len = make_filename(PyUnicode_AsString(self->prefix), subname, path);
1084	if (len < 0)
1085		return NULL;
1086
1087	for (zso = zip_searchorder; *zso->suffix; zso++) {
1088		PyObject *code = NULL;
1089
1090		strcpy(path + len, zso->suffix);
1091		if (Py_VerboseFlag > 1)
1092			PySys_WriteStderr("# trying %s%c%s\n",
1093					  PyUnicode_AsString(self->archive),
1094					  SEP, path);
1095		toc_entry = PyDict_GetItemString(self->files, path);
1096		if (toc_entry != NULL) {
1097			time_t mtime = 0;
1098			int ispackage = zso->type & IS_PACKAGE;
1099			int isbytecode = zso->type & IS_BYTECODE;
1100
1101			if (isbytecode)
1102				mtime = get_mtime_of_source(self, path);
1103			if (p_ispackage != NULL)
1104				*p_ispackage = ispackage;
1105			code = get_code_from_data(self, ispackage,
1106						  isbytecode, mtime,
1107						  toc_entry);
1108			if (code == Py_None) {
1109				/* bad magic number or non-matching mtime
1110				   in byte code, try next */
1111				Py_DECREF(code);
1112				continue;
1113			}
1114			if (code != NULL && p_modpath != NULL)
1115				*p_modpath = PyUnicode_AsString(
1116					PyTuple_GetItem(toc_entry, 0));
1117			return code;
1118		}
1119	}
1120	PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1121	return NULL;
1122}
1123
1124
1125/* Module init */
1126
1127PyDoc_STRVAR(zipimport_doc,
1128"zipimport provides support for importing Python modules from Zip archives.\n\
1129\n\
1130This module exports three objects:\n\
1131- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1132- ZipImportError: exception raised by zipimporter objects. It's a\n\
1133  subclass of ImportError, so it can be caught as ImportError, too.\n\
1134- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1135  info dicts, as used in zipimporter._files.\n\
1136\n\
1137It is usually not needed to use the zipimport module explicitly; it is\n\
1138used by the builtin import mechanism for sys.path items that are paths\n\
1139to Zip archives.");
1140
1141PyMODINIT_FUNC
1142initzipimport(void)
1143{
1144	PyObject *mod;
1145
1146	if (PyType_Ready(&ZipImporter_Type) < 0)
1147		return;
1148
1149	/* Correct directory separator */
1150	zip_searchorder[0].suffix[0] = SEP;
1151	zip_searchorder[1].suffix[0] = SEP;
1152	zip_searchorder[2].suffix[0] = SEP;
1153	if (Py_OptimizeFlag) {
1154		/* Reverse *.pyc and *.pyo */
1155		struct st_zip_searchorder tmp;
1156		tmp = zip_searchorder[0];
1157		zip_searchorder[0] = zip_searchorder[1];
1158		zip_searchorder[1] = tmp;
1159		tmp = zip_searchorder[3];
1160		zip_searchorder[3] = zip_searchorder[4];
1161		zip_searchorder[4] = tmp;
1162	}
1163
1164	mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1165			     NULL, PYTHON_API_VERSION);
1166	if (mod == NULL)
1167		return;
1168
1169	ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1170					    PyExc_ImportError, NULL);
1171	if (ZipImportError == NULL)
1172		return;
1173
1174	Py_INCREF(ZipImportError);
1175	if (PyModule_AddObject(mod, "ZipImportError",
1176			       ZipImportError) < 0)
1177		return;
1178
1179	Py_INCREF(&ZipImporter_Type);
1180	if (PyModule_AddObject(mod, "zipimporter",
1181			       (PyObject *)&ZipImporter_Type) < 0)
1182		return;
1183
1184	zip_directory_cache = PyDict_New();
1185	if (zip_directory_cache == NULL)
1186		return;
1187	Py_INCREF(zip_directory_cache);
1188	if (PyModule_AddObject(mod, "_zip_directory_cache",
1189			       zip_directory_cache) < 0)
1190		return;
1191}
1192