zipimport.c revision f8b6de168bac1a002cf8931d5f601240d5c1b4d4
1#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include "compile.h"
6#include <time.h>
7
8
9#define IS_SOURCE   0x0
10#define IS_BYTECODE 0x1
11#define IS_PACKAGE  0x2
12
13struct st_zip_searchorder {
14	char suffix[14];
15	int type;
16};
17
18/* zip_searchorder defines how we search for a module in the Zip
19   archive: we first search for a package __init__, then for
20   non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
21   are swapped by initzipimport() if we run in optimized mode. Also,
22   '/' is replaced by SEP there. */
23struct st_zip_searchorder zip_searchorder[] = {
24	{"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
25	{"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
26	{"/__init__.py", IS_PACKAGE | IS_SOURCE},
27	{".pyc", IS_BYTECODE},
28	{".pyo", IS_BYTECODE},
29	{".py", IS_SOURCE},
30	{"", 0}
31};
32
33/* zipimporter object definition and support */
34
35typedef struct _zipimporter ZipImporter;
36
37struct _zipimporter {
38	PyObject_HEAD
39	PyObject *archive;  /* pathname of the Zip archive */
40	PyObject *prefix;   /* file prefix: "a/sub/directory/" */
41	PyObject *files;    /* dict with file info {path: toc_entry} */
42};
43
44static PyTypeObject ZipImporter_Type;
45static PyObject *ZipImportError;
46static PyObject *zip_directory_cache = NULL;
47
48/* forward decls */
49static PyObject *read_directory(char *archive);
50static PyObject *get_data(char *archive, PyObject *toc_entry);
51static PyObject *get_module_code(ZipImporter *self, char *fullname,
52				 int *p_ispackage, char **p_modpath);
53
54
55#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56
57
58/* zipimporter.__init__
59   Split the "subdirectory" from the Zip archive path, lookup a matching
60   entry in sys.path_importer_cache, fetch the file directory from there
61   if found, or else read it from the archive. */
62static int
63zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
64{
65	char *path, *p, *prefix, buf[MAXPATHLEN+2];
66	int len;
67
68	if (!PyArg_ParseTuple(args, "s:zipimporter",
69			      &path))
70		return -1;
71
72	len = strlen(path);
73	if (len == 0) {
74		PyErr_SetString(ZipImportError, "archive path is empty");
75		return -1;
76	}
77	if (len >= MAXPATHLEN) {
78		PyErr_SetString(ZipImportError,
79				"archive path too long");
80		return -1;
81	}
82	strcpy(buf, path);
83
84#ifdef ALTSEP
85	for (p = buf; *p; p++) {
86		if (*p == ALTSEP)
87			*p = SEP;
88	}
89#endif
90
91	path = NULL;
92	prefix = NULL;
93	for (;;) {
94		struct stat statbuf;
95		int rv;
96
97		rv = stat(buf, &statbuf);
98		if (rv == 0) {
99			/* it exists */
100			if (S_ISREG(statbuf.st_mode))
101				/* it's a file */
102				path = buf;
103			break;
104		}
105		/* back up one path element */
106		p = strchr(buf, SEP);
107		if (prefix != NULL)
108			*prefix = SEP;
109		if (p == NULL)
110			break;
111		*p = '\0';
112		prefix = p;
113	}
114	if (path != NULL) {
115		PyObject *files;
116		files = PyDict_GetItemString(zip_directory_cache, path);
117		if (files == NULL) {
118			files = read_directory(buf);
119			if (files == NULL)
120				return -1;
121			if (PyDict_SetItemString(zip_directory_cache, path,
122						 files) != 0)
123				return -1;
124		}
125		else
126			Py_INCREF(files);
127		self->files = files;
128	}
129	else {
130		PyErr_SetString(ZipImportError, "not a Zip file");
131		return -1;
132	}
133
134	if (prefix == NULL)
135		prefix = "";
136	else {
137		prefix++;
138		len = strlen(prefix);
139		if (prefix[len-1] != SEP) {
140			/* add trailing SEP */
141			prefix[len] = SEP;
142			prefix[len + 1] = '\0';
143		}
144	}
145
146	self->archive = PyString_FromString(buf);
147	if (self->archive == NULL)
148		return -1;
149
150	self->prefix = PyString_FromString(prefix);
151	if (self->prefix == NULL)
152		return -1;
153
154	return 0;
155}
156
157/* GC support. */
158static int
159zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
160{
161	ZipImporter *self = (ZipImporter *)obj;
162	int err;
163
164	if (self->files != NULL) {
165		err = visit(self->files, arg);
166		if (err)
167			return err;
168	}
169	return 0;
170}
171
172static void
173zipimporter_dealloc(ZipImporter *self)
174{
175	PyObject_GC_UnTrack(self);
176	Py_XDECREF(self->archive);
177	Py_XDECREF(self->prefix);
178	Py_XDECREF(self->files);
179	self->ob_type->tp_free((PyObject *)self);
180}
181
182static PyObject *
183zipimporter_repr(ZipImporter *self)
184{
185	char buf[500];
186	char *archive = "???";
187	char *prefix = "";
188
189	if (self->archive != NULL && PyString_Check(self->archive))
190		archive = PyString_AsString(self->archive);
191	if (self->prefix != NULL && PyString_Check(self->prefix))
192		prefix = PyString_AsString(self->prefix);
193	if (prefix != NULL && *prefix)
194		PyOS_snprintf(buf, sizeof(buf),
195			      "<zipimporter object \"%.300s%c%.150s\">",
196			      archive, SEP, prefix);
197	else
198		PyOS_snprintf(buf, sizeof(buf),
199			      "<zipimporter object \"%.300s\">",
200			      archive);
201	return PyString_FromString(buf);
202}
203
204/* return fullname.split(".")[-1] */
205static char *
206get_subname(char *fullname)
207{
208	char *subname = strrchr(fullname, '.');
209	if (subname == NULL)
210		subname = fullname;
211	else
212		subname++;
213	return subname;
214}
215
216/* Given a (sub)modulename, write the potential file path in the
217   archive (without extension) to the path buffer. Return the
218   length of the resulting string. */
219static int
220make_filename(char *prefix, char *name, char *path)
221{
222	int len;
223	char *p;
224
225	len = strlen(prefix);
226
227	/* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
228	if (len + strlen(name) + 13 >= MAXPATHLEN) {
229		PyErr_SetString(ZipImportError, "path too long");
230		return -1;
231	}
232
233	strcpy(path, prefix);
234	strcpy(path + len, name);
235	for (p = path + len; *p; p++) {
236		if (*p == '.')
237			*p = SEP;
238	}
239	len += strlen(name);
240	return len;
241}
242
243enum module_info {
244	MI_ERROR,
245	MI_NOT_FOUND,
246	MI_MODULE,
247	MI_PACKAGE
248};
249
250/* Return some information about a module. */
251static enum module_info
252get_module_info(ZipImporter *self, char *fullname)
253{
254	char *subname, path[MAXPATHLEN + 1];
255	int len;
256	struct st_zip_searchorder *zso;
257
258	subname = get_subname(fullname);
259
260	len = make_filename(PyString_AsString(self->prefix), subname, path);
261	if (len < 0)
262		return MI_ERROR;
263
264	for (zso = zip_searchorder; *zso->suffix; zso++) {
265		strcpy(path + len, zso->suffix);
266		if (PyDict_GetItemString(self->files, path) != NULL) {
267			if (zso->type & IS_PACKAGE)
268				return MI_PACKAGE;
269			else
270				return MI_MODULE;
271		}
272	}
273	return MI_NOT_FOUND;
274}
275
276/* Check whether we can satisfy the import of the module named by
277   'fullname'. Return self if we can, None if we can't. */
278static PyObject *
279zipimporter_find_module(PyObject *obj, PyObject *args)
280{
281	ZipImporter *self = (ZipImporter *)obj;
282	PyObject *path = NULL;
283	char *fullname;
284	enum module_info mi;
285
286	if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
287			      &fullname, &path))
288		return NULL;
289
290	mi = get_module_info(self, fullname);
291	if (mi == MI_ERROR)
292		return NULL;
293	if (mi == MI_NOT_FOUND) {
294		Py_INCREF(Py_None);
295		return Py_None;
296	}
297	Py_INCREF(self);
298	return (PyObject *)self;
299}
300
301/* Load and return the module named by 'fullname'. */
302static PyObject *
303zipimporter_load_module(PyObject *obj, PyObject *args)
304{
305	ZipImporter *self = (ZipImporter *)obj;
306	PyObject *code, *mod, *dict;
307	char *fullname, *modpath;
308	int ispackage;
309
310	if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
311			      &fullname))
312		return NULL;
313
314	code = get_module_code(self, fullname, &ispackage, &modpath);
315	if (code == NULL)
316		return NULL;
317
318	mod = PyImport_AddModule(fullname);
319	if (mod == NULL) {
320		Py_DECREF(code);
321		return NULL;
322	}
323	dict = PyModule_GetDict(mod);
324
325	/* mod.__loader__ = self */
326	if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
327		goto error;
328
329	if (ispackage) {
330		/* add __path__ to the module *before* the code gets
331		   executed */
332		PyObject *pkgpath, *fullpath;
333		char *prefix = PyString_AsString(self->prefix);
334		char *subname = get_subname(fullname);
335		int err;
336
337		fullpath = PyString_FromFormat("%s%c%s%s",
338					PyString_AsString(self->archive),
339					SEP,
340					*prefix ? prefix : "",
341					subname);
342		if (fullpath == NULL)
343			goto error;
344
345		pkgpath = Py_BuildValue("[O]", fullpath);
346		Py_DECREF(fullpath);
347		if (pkgpath == NULL)
348			goto error;
349		err = PyDict_SetItemString(dict, "__path__", pkgpath);
350		Py_DECREF(pkgpath);
351		if (err != 0)
352			goto error;
353	}
354	mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
355	Py_DECREF(code);
356	if (Py_VerboseFlag)
357		PySys_WriteStderr("import %s # loaded from Zip %s\n",
358				  fullname, modpath);
359	return mod;
360error:
361	Py_DECREF(code);
362	Py_DECREF(mod);
363	return NULL;
364}
365
366/* Return a bool signifying whether the module is a package or not. */
367static PyObject *
368zipimporter_is_package(PyObject *obj, PyObject *args)
369{
370	ZipImporter *self = (ZipImporter *)obj;
371	char *fullname;
372	enum module_info mi;
373
374	if (!PyArg_ParseTuple(args, "s:zipimporter.find_module",
375			      &fullname))
376		return NULL;
377
378	mi = get_module_info(self, fullname);
379	if (mi == MI_ERROR)
380		return NULL;
381	if (mi == MI_NOT_FOUND) {
382		PyErr_Format(ZipImportError, "can't find module '%.200s'",
383			     fullname);
384		return NULL;
385	}
386	return PyBool_FromLong(mi == MI_PACKAGE);
387}
388
389static PyObject *
390zipimporter_get_data(PyObject *obj, PyObject *args)
391{
392	ZipImporter *self = (ZipImporter *)obj;
393	char *path;
394#ifdef ALTSEP
395	char *p, buf[MAXPATHLEN + 1];
396#endif
397	PyObject *toc_entry;
398	int len;
399
400	if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
401		return NULL;
402
403#ifdef ALTSEP
404	if (strlen(path) >= MAXPATHLEN) {
405		PyErr_SetString(ZipImportError, "path too long");
406		return NULL;
407	}
408	strcpy(buf, path);
409	for (p = buf; *p; p++) {
410		if (*p == ALTSEP)
411			*p = SEP;
412	}
413	path = buf;
414#endif
415	len = PyString_Size(self->archive);
416	if ((size_t)len < strlen(path) &&
417	    strncmp(path, PyString_AsString(self->archive), len) == 0 &&
418	    path[len] == SEP) {
419		path = path + len + 1;
420	}
421
422	toc_entry = PyDict_GetItemString(self->files, path);
423	if (toc_entry == NULL) {
424		PyErr_Format(PyExc_IOError, "file not found [%.200s]",
425			     path);
426		return NULL;
427	}
428	return get_data(PyString_AsString(self->archive), toc_entry);
429}
430
431static PyObject *
432zipimporter_get_code(PyObject *obj, PyObject *args)
433{
434	ZipImporter *self = (ZipImporter *)obj;
435	char *fullname;
436
437	if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
438		return NULL;
439
440	return get_module_code(self, fullname, NULL, NULL);
441}
442
443static PyObject *
444zipimporter_get_source(PyObject *obj, PyObject *args)
445{
446	ZipImporter *self = (ZipImporter *)obj;
447	PyObject *toc_entry;
448	char *fullname, *subname, path[MAXPATHLEN+1];
449	int len;
450	enum module_info mi;
451
452	if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
453		return NULL;
454
455	mi = get_module_info(self, fullname);
456	if (mi == MI_ERROR)
457		return NULL;
458	if (mi == MI_NOT_FOUND) {
459		PyErr_Format(ZipImportError, "can't find module '%.200s'",
460			     fullname);
461		return NULL;
462	}
463	subname = get_subname(fullname);
464
465	len = make_filename(PyString_AsString(self->prefix), subname, path);
466	if (len < 0)
467		return NULL;
468
469	if (mi == MI_PACKAGE) {
470		path[len] = SEP;
471		strcpy(path + len + 1, "__init__.py");
472	}
473	else
474		strcpy(path + len, ".py");
475
476	toc_entry = PyDict_GetItemString(self->files, path);
477	if (toc_entry != NULL)
478		return get_data(PyString_AsString(self->archive), toc_entry);
479
480	/* we have the module, but no source */
481	Py_INCREF(Py_None);
482	return Py_None;
483}
484
485PyDoc_STRVAR(doc_find_module,
486"find_module(fullname, path=None) -> self or None.\n\
487\n\
488Search for a module specified by 'fullname'. 'fullname' must be the\n\
489fully qualified (dotted) module name. It returns the zipimporter\n\
490instance itself if the module was found, or None if it wasn't.\n\
491The optional 'path' argument is ignored -- it's there for compatibility\n\
492with the importer protocol.");
493
494PyDoc_STRVAR(doc_load_module,
495"load_module(fullname) -> module.\n\
496\n\
497Load the module specified by 'fullname'. 'fullname' must be the\n\
498fully qualified (dotted) module name. It returns the imported\n\
499module, or raises ZipImportError if it wasn't found.");
500
501PyDoc_STRVAR(doc_get_data,
502"get_data(pathname) -> string with file data.\n\
503\n\
504Return the data associated with 'pathname'. Raise IOError if\n\
505the file wasn't found.");
506
507PyDoc_STRVAR(doc_is_package,
508"is_package(fullname) -> bool.\n\
509\n\
510Return True if the module specified by fullname is a package.\n\
511Raise ZipImportError is the module couldn't be found.");
512
513PyDoc_STRVAR(doc_get_code,
514"get_code(fullname) -> code object.\n\
515\n\
516Return the code object for the specified module. Raise ZipImportError\n\
517is the module couldn't be found.");
518
519PyDoc_STRVAR(doc_get_source,
520"get_source(fullname) -> source string.\n\
521\n\
522Return the source code for the specified module. Raise ZipImportError\n\
523is the module couldn't be found, return None if the archive does\n\
524contain the module, but has no source for it.");
525
526static PyMethodDef zipimporter_methods[] = {
527	{"find_module", zipimporter_find_module, METH_VARARGS,
528	 doc_find_module},
529	{"load_module", zipimporter_load_module, METH_VARARGS,
530	 doc_load_module},
531	{"get_data", zipimporter_get_data, METH_VARARGS,
532	 doc_get_data},
533	{"get_code", zipimporter_get_code, METH_VARARGS,
534	 doc_get_code},
535	{"get_source", zipimporter_get_source, METH_VARARGS,
536	 doc_get_source},
537	{"is_package", zipimporter_is_package, METH_VARARGS,
538	 doc_is_package},
539	{NULL,		NULL}	/* sentinel */
540};
541
542static PyMemberDef zipimporter_members[] = {
543	{"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
544	{"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
545	{"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
546	{NULL}
547};
548
549PyDoc_STRVAR(zipimporter_doc,
550"zipimporter(archivepath) -> zipimporter object\n\
551\n\
552Create a new zipimporter instance. 'archivepath' must be a path to\n\
553a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
554a valid Zip archive.");
555
556#define DEFERRED_ADDRESS(ADDR) 0
557
558static PyTypeObject ZipImporter_Type = {
559	PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
560	0,
561	"zipimport.zipimporter",
562	sizeof(ZipImporter),
563	0,					/* tp_itemsize */
564	(destructor)zipimporter_dealloc,	/* tp_dealloc */
565	0,					/* tp_print */
566	0,					/* tp_getattr */
567	0,					/* tp_setattr */
568	0,					/* tp_compare */
569	(reprfunc)zipimporter_repr,		/* tp_repr */
570	0,					/* tp_as_number */
571	0,					/* tp_as_sequence */
572	0,					/* tp_as_mapping */
573	0,					/* tp_hash */
574	0,					/* tp_call */
575	0,					/* tp_str */
576	PyObject_GenericGetAttr,		/* tp_getattro */
577	0,					/* tp_setattro */
578	0,					/* tp_as_buffer */
579	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
580		Py_TPFLAGS_HAVE_GC,		/* tp_flags */
581	zipimporter_doc,			/* tp_doc */
582	zipimporter_traverse,			/* tp_traverse */
583	0,					/* tp_clear */
584	0,					/* tp_richcompare */
585	0,					/* tp_weaklistoffset */
586	0,					/* tp_iter */
587	0,					/* tp_iternext */
588	zipimporter_methods,			/* tp_methods */
589	zipimporter_members,			/* tp_members */
590	0,					/* tp_getset */
591	0,					/* tp_base */
592	0,					/* tp_dict */
593	0,					/* tp_descr_get */
594	0,					/* tp_descr_set */
595	0,					/* tp_dictoffset */
596	(initproc)zipimporter_init,		/* tp_init */
597	PyType_GenericAlloc,			/* tp_alloc */
598	PyType_GenericNew,			/* tp_new */
599	PyObject_GC_Del,			/* tp_free */
600};
601
602
603/* implementation */
604
605/* Given a buffer, return the short that is represented by the first
606   2 bytes, encoded as little endian. This partially reimplements
607   marshal.c:r_short(). */
608static int
609get_short(unsigned char *buf)
610{
611	short x;
612	x = buf[0];
613	x |= buf[1] << 8;
614	/* Sign-extension, in case short greater than 16 bits */
615	x |= -(x & 0x8000);
616	return x;
617}
618
619/* Given a buffer, return the long that is represented by the first
620   4 bytes, encoded as little endian. This partially reimplements
621   marshal.c:r_long() */
622static long
623get_long(unsigned char *buf) {
624	long x;
625	x =  buf[0];
626	x |= (long)buf[1] <<  8;
627	x |= (long)buf[2] << 16;
628	x |= (long)buf[3] << 24;
629#if SIZEOF_LONG > 4
630	/* Sign extension for 64-bit machines */
631	x |= -(x & 0x80000000L);
632#endif
633	return x;
634}
635
636/*
637   read_directory(archive) -> files dict (new reference)
638
639   Given a path to a Zip archive, build a dict, mapping file names
640   (local to the archive, using SEP as a separator) to toc entries.
641
642   A toc_entry is a tuple:
643
644       (compress,      # compression kind; 0 for uncompressed
645        data_size,     # size of compressed data on disk
646        file_size,     # size of decompressed data
647        file_offset,   # offset of file header from start of archive
648        time,          # mod time of file (in dos format)
649        date,          # mod data of file (in dos format)
650        crc,           # crc checksum of the data
651       )
652
653   Directories can be recognized by the trailing SEP in the name,
654   data_size and file_offset are 0.
655*/
656static PyObject *
657read_directory(char *archive)
658{
659	PyObject *files = NULL;
660	FILE *fp;
661	long compress, crc, data_size, file_size, file_offset, date, time;
662	long header_offset, name_size, header_size, header_end;
663	long i, l, length, count;
664	char path[MAXPATHLEN + 5];
665	char name[MAXPATHLEN + 5];
666	char *p, endof_central_dir[22];
667
668	if (strlen(archive) > MAXPATHLEN) {
669		PyErr_SetString(PyExc_OverflowError,
670				"Zip path name is too long");
671		return NULL;
672	}
673	strcpy(path, archive);
674
675	fp = fopen(archive, "rb");
676	if (fp == NULL) {
677		PyErr_Format(ZipImportError, "can't open Zip file: "
678			     "'%.200s'", archive);
679		return NULL;
680	}
681	fseek(fp, -22, 2);	/* Seek from end of file */
682	header_end = ftell(fp);
683	if (fread(endof_central_dir, 1, 22, fp) != 22) {
684		fclose(fp);
685		PyErr_Format(ZipImportError, "can't read Zip file: "
686			     "'%.200s'", archive);
687		return NULL;
688	}
689	if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
690		/* Bad: End of Central Dir signature */
691		fclose(fp);
692		PyErr_Format(ZipImportError, "not a Zip file: "
693			     "'%.200s'", archive);
694		return NULL;
695	}
696
697	header_offset = get_long((unsigned char *)endof_central_dir + 16);
698
699	files = PyDict_New();
700	if (files == NULL)
701		goto error;
702
703	length = (long)strlen(path);
704	path[length] = SEP;
705
706	/* Start of Central Directory */
707	count = 0;
708	for (;;) {
709		PyObject *t;
710		int err;
711
712		fseek(fp, header_offset, 0);  /* Start of file header */
713		l = PyMarshal_ReadLongFromFile(fp);
714		if (l != 0x02014B50)
715			break;	/* Bad: Central Dir File Header */
716		fseek(fp, header_offset + 10, 0);
717		compress = PyMarshal_ReadShortFromFile(fp);
718		time = PyMarshal_ReadShortFromFile(fp);
719		date = PyMarshal_ReadShortFromFile(fp);
720		crc = PyMarshal_ReadLongFromFile(fp);
721		data_size = PyMarshal_ReadLongFromFile(fp);
722		file_size = PyMarshal_ReadLongFromFile(fp);
723		name_size = PyMarshal_ReadShortFromFile(fp);
724		header_size = 46 + name_size +
725		   PyMarshal_ReadShortFromFile(fp) +
726		   PyMarshal_ReadShortFromFile(fp);
727		fseek(fp, header_offset + 42, 0);
728		file_offset = PyMarshal_ReadLongFromFile(fp);
729		if (name_size > MAXPATHLEN)
730			name_size = MAXPATHLEN;
731
732		p = name;
733		for (i = 0; i < name_size; i++) {
734			*p = (char)getc(fp);
735			if (*p == '/')
736				*p = SEP;
737			p++;
738		}
739		*p = 0;	/* Add terminating null byte */
740		header_offset += header_size;
741
742		strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
743
744		t = Py_BuildValue("siiiiiii", path, compress, data_size,
745				  file_size, file_offset, time, date, crc);
746		if (t == NULL)
747			goto error;
748		err = PyDict_SetItemString(files, name, t);
749		Py_DECREF(t);
750		if (err != 0)
751			goto error;
752		count++;
753	}
754	fclose(fp);
755	if (Py_VerboseFlag)
756		PySys_WriteStderr("# zipimport: found %ld names in %s\n",
757			count, archive);
758	return files;
759error:
760	fclose(fp);
761	Py_XDECREF(files);
762	return NULL;
763}
764
765/* Return the zlib.decompress function object, or NULL if zlib couldn't
766   be imported. The function is cached when found, so subsequent calls
767   don't import zlib again. Returns a *borrowed* reference.
768   XXX This makes zlib.decompress immortal. */
769static PyObject *
770get_decompress_func(void)
771{
772	static PyObject *decompress = NULL;
773
774	if (decompress == NULL) {
775		PyObject *zlib;
776		static int importing_zlib = 0;
777
778		if (importing_zlib != 0)
779			/* Someone has a zlib.py[co] in their Zip file;
780			   let's avoid a stack overflow. */
781			return NULL;
782		importing_zlib = 1;
783		zlib = PyImport_ImportModule("zlib");	/* import zlib */
784		importing_zlib = 0;
785		if (zlib != NULL) {
786			decompress = PyObject_GetAttrString(zlib,
787							    "decompress");
788			Py_DECREF(zlib);
789		}
790		else
791			PyErr_Clear();
792		if (Py_VerboseFlag)
793			PySys_WriteStderr("# zipimport: zlib %s\n",
794				zlib != NULL ? "available": "UNAVAILABLE");
795	}
796	return decompress;
797}
798
799/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
800   data as a new reference. */
801static PyObject *
802get_data(char *archive, PyObject *toc_entry)
803{
804	PyObject *raw_data, *data = NULL, *decompress;
805	char *buf;
806	FILE *fp;
807	int err, bytes_read = 0;
808	long l;
809	char *datapath;
810	long compress, data_size, file_size, file_offset;
811	long time, date, crc;
812
813	if (!PyArg_ParseTuple(toc_entry, "siiiiiii", &datapath, &compress,
814			      &data_size, &file_size, &file_offset, &time,
815			      &date, &crc)) {
816		return NULL;
817	}
818
819	fp = fopen(archive, "rb");
820	if (!fp) {
821		PyErr_Format(PyExc_IOError,
822		   "zipimport: can not open file %s", archive);
823		return NULL;
824	}
825
826	/* Check to make sure the local file header is correct */
827	fseek(fp, file_offset, 0);
828	l = PyMarshal_ReadLongFromFile(fp);
829	if (l != 0x04034B50) {
830		/* Bad: Local File Header */
831		PyErr_Format(ZipImportError,
832			     "bad local file header in %s",
833			     archive);
834		fclose(fp);
835		return NULL;
836	}
837	fseek(fp, file_offset + 26, 0);
838	l = 30 + PyMarshal_ReadShortFromFile(fp) +
839	    PyMarshal_ReadShortFromFile(fp);	/* local header size */
840	file_offset += l;	/* Start of file data */
841
842	raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
843					      data_size : data_size + 1);
844	if (raw_data == NULL) {
845		fclose(fp);
846		return NULL;
847	}
848	buf = PyString_AsString(raw_data);
849
850	err = fseek(fp, file_offset, 0);
851	if (err == 0)
852		bytes_read = fread(buf, 1, data_size, fp);
853	fclose(fp);
854	if (err || bytes_read != data_size) {
855		PyErr_SetString(PyExc_IOError,
856				"zipimport: can't read data");
857		Py_DECREF(raw_data);
858		return NULL;
859	}
860
861	if (compress != 0) {
862		buf[data_size] = 'Z';  /* saw this in zipfile.py */
863		data_size++;
864	}
865	buf[data_size] = '\0';
866
867	if (compress == 0)  /* data is not compressed */
868		return raw_data;
869
870	/* Decompress with zlib */
871	decompress = get_decompress_func();
872	if (decompress == NULL) {
873		PyErr_SetString(ZipImportError,
874				"can't decompress data; "
875				"zlib not available");
876		goto error;
877	}
878	data = PyObject_CallFunction(decompress, "Ol", raw_data, -15);
879error:
880	Py_DECREF(raw_data);
881	return data;
882}
883
884/* Lenient date/time comparison function. The precision of the mtime
885   in the archive is lower than the mtime stored in a .pyc: we
886   must allow a difference of at most one second. */
887static int
888eq_mtime(time_t t1, time_t t2)
889{
890	time_t d = t1 - t2;
891	if (d < 0)
892		d = -d;
893	/* dostime only stores even seconds, so be lenient */
894	return d <= 1;
895}
896
897/* Given the contents of a .py[co] file in a buffer, unmarshal the data
898   and return the code object. Return None if it the magic word doesn't
899   match (we do this instead of raising an exception as we fall back
900   to .py if available and we don't want to mask other errors).
901   Returns a new reference. */
902static PyObject *
903unmarshal_code(char *pathname, PyObject *data, time_t mtime)
904{
905	PyObject *code;
906	char *buf = PyString_AsString(data);
907	int size = PyString_Size(data);
908
909	if (size <= 9) {
910		PyErr_SetString(ZipImportError,
911				"bad pyc data");
912		return NULL;
913	}
914
915	if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
916		if (Py_VerboseFlag)
917			PySys_WriteStderr("# %s has bad magic\n",
918					  pathname);
919		Py_INCREF(Py_None);
920		return Py_None;  /* signal caller to try alternative */
921	}
922
923	if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4), mtime)) {
924		if (Py_VerboseFlag)
925			PySys_WriteStderr("# %s has bad mtime\n",
926					  pathname);
927		Py_INCREF(Py_None);
928		return Py_None;  /* signal caller to try alternative */
929	}
930
931	code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
932	if (code == NULL)
933		return NULL;
934	if (!PyCode_Check(code)) {
935		Py_DECREF(code);
936		PyErr_Format(PyExc_TypeError,
937		     "compiled module %.200s is not a code object",
938		     pathname);
939		return NULL;
940	}
941	return code;
942}
943
944/* Replace any occurances of "\r\n?" in the input string with "\n".
945   This converts DOS and Mac line endings to Unix line endings.
946   Also append a trailing "\n" to be compatible with
947   PyParser_SimpleParseFile(). Returns a new reference. */
948static PyObject *
949normalize_line_endings(PyObject *source)
950{
951	char *q, *p = PyString_AsString(source);
952	int length = PyString_Size(source) + 1;
953	PyObject *fixed_source;
954
955	fixed_source = PyString_FromStringAndSize(p, length);
956	if (fixed_source == NULL)
957		return NULL;
958
959	q = PyString_AsString(fixed_source);
960	/* replace "\r\n?" by "\n" */
961	for (;;) {
962		if (*p == '\r') {
963			*q++ = '\n';
964			if (*(p + 1) == '\n') {
965				p++;
966				length--;
967			}
968		}
969		else
970			*q++ = *p;
971		if (*p == '\0')
972			break;
973		p++;
974	}
975	*q++ = '\n';  /* add trailing \n */
976	*q = '\0';
977	_PyString_Resize(&fixed_source, length);
978	return fixed_source;
979}
980
981/* Given a string buffer containing Python source code, compile it
982   return and return a code object as a new reference. */
983static PyObject *
984compile_source(char *pathname, PyObject *source)
985{
986	PyObject *code, *fixed_source;
987
988	fixed_source = normalize_line_endings(source);
989	if (fixed_source == NULL)
990		return NULL;
991
992	code = Py_CompileString(PyString_AsString(fixed_source), pathname,
993				Py_file_input);
994	Py_DECREF(fixed_source);
995	return code;
996}
997
998/* Convert the date/time values found in the Zip archive to a value
999   that's compatible with the time stamp stored in .pyc files. */
1000time_t parse_dostime(int dostime, int dosdate)
1001{
1002	struct tm stm;
1003
1004	stm.tm_sec   =  (dostime        & 0x1f) * 2;
1005	stm.tm_min   =  (dostime >> 5)  & 0x3f;
1006	stm.tm_hour  =  (dostime >> 11) & 0x1f;
1007	stm.tm_mday  =   dosdate        & 0x1f;
1008	stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1009	stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1010	stm.tm_isdst =   0; /* wday/yday is ignored */
1011
1012	return mktime(&stm);
1013}
1014
1015/* Given a path to a .pyc or .pyo file in the archive, return the
1016   modifictaion time of the matching .py file, or 0 if no source
1017   is available. */
1018static time_t
1019get_mtime_of_source(ZipImporter *self, char *path)
1020{
1021	PyObject *toc_entry;
1022	time_t mtime = 0;
1023	int lastchar = strlen(path) - 1;
1024	char savechar = path[lastchar];
1025	path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
1026	toc_entry = PyDict_GetItemString(self->files, path);
1027	if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1028	    PyTuple_Size(toc_entry) == 8) {
1029		/* fetch the time stamp of the .py file for comparison
1030		   with an embedded pyc time stamp */
1031		int time, date;
1032		time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1033		date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1034		mtime = parse_dostime(time, date);
1035	}
1036	path[lastchar] = savechar;
1037	return mtime;
1038}
1039
1040/* Return the code object for the module named by 'fullname' from the
1041   Zip archive as a new reference. */
1042static PyObject *
1043get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1044		   time_t mtime, PyObject *toc_entry)
1045{
1046	PyObject *data, *code;
1047	char *modpath;
1048	char *archive = PyString_AsString(self->archive);
1049
1050	if (archive == NULL)
1051		return NULL;
1052
1053	data = get_data(archive, toc_entry);
1054	if (data == NULL)
1055		return NULL;
1056
1057	modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1058
1059	if (isbytecode) {
1060		code = unmarshal_code(modpath, data, mtime);
1061	}
1062	else {
1063		code = compile_source(modpath, data);
1064	}
1065	Py_DECREF(data);
1066	return code;
1067}
1068
1069/* Get the code object assoiciated with the module specified by
1070   'fullname'. */
1071static PyObject *
1072get_module_code(ZipImporter *self, char *fullname,
1073		int *p_ispackage, char **p_modpath)
1074{
1075	PyObject *toc_entry;
1076	char *subname, path[MAXPATHLEN + 1];
1077	int len;
1078	struct st_zip_searchorder *zso;
1079
1080	subname = get_subname(fullname);
1081
1082	len = make_filename(PyString_AsString(self->prefix), subname, path);
1083	if (len < 0)
1084		return NULL;
1085
1086	for (zso = zip_searchorder; *zso->suffix; zso++) {
1087		PyObject *code = NULL;
1088
1089		strcpy(path + len, zso->suffix);
1090		if (Py_VerboseFlag > 1)
1091			PySys_WriteStderr("# trying %s%c%s\n",
1092					  PyString_AsString(self->archive),
1093					  SEP, path);
1094		toc_entry = PyDict_GetItemString(self->files, path);
1095		if (toc_entry != NULL) {
1096			time_t mtime = 0;
1097			int ispackage = zso->type & IS_PACKAGE;
1098			int isbytecode = zso->type & IS_BYTECODE;
1099
1100			if (isbytecode)
1101				mtime = get_mtime_of_source(self, path);
1102			if (p_ispackage != NULL)
1103				*p_ispackage = ispackage;
1104			code = get_code_from_data(self, ispackage,
1105						  isbytecode, mtime,
1106						  toc_entry);
1107			if (code == Py_None) {
1108				/* bad magic number or non-matching mtime
1109				   in byte code, try next */
1110				Py_DECREF(code);
1111				continue;
1112			}
1113			if (code != NULL && p_modpath != NULL)
1114				*p_modpath = PyString_AsString(
1115					PyTuple_GetItem(toc_entry, 0));
1116			return code;
1117		}
1118	}
1119	PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1120	return NULL;
1121}
1122
1123
1124/* Module init */
1125
1126PyDoc_STRVAR(zipimport_doc,
1127"zipimport provides support for importing Python modules from Zip archives.\n\
1128\n\
1129This module exports three objects:\n\
1130- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1131- ZipImporterError: exception raised by zipimporter objects. It's a\n\
1132  subclass of ImportError, so it can be caught as ImportError, too.\n\
1133- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1134  info dicts, as used in zipimporter._files.\n\
1135\n\
1136It is usually not needed to use the zipimport module explicitly; it is\n\
1137used by the builtin import mechanism for sys.path items that are paths\n\
1138to Zip archives.");
1139
1140PyMODINIT_FUNC
1141initzipimport(void)
1142{
1143	PyObject *mod;
1144
1145	if (PyType_Ready(&ZipImporter_Type) < 0)
1146		return;
1147
1148	/* Correct directory separator */
1149	zip_searchorder[0].suffix[0] = SEP;
1150	zip_searchorder[1].suffix[0] = SEP;
1151	zip_searchorder[2].suffix[0] = SEP;
1152	if (Py_OptimizeFlag) {
1153		/* Reverse *.pyc and *.pyo */
1154		struct st_zip_searchorder tmp;
1155		tmp = zip_searchorder[0];
1156		zip_searchorder[0] = zip_searchorder[1];
1157		zip_searchorder[1] = tmp;
1158		tmp = zip_searchorder[3];
1159		zip_searchorder[3] = zip_searchorder[4];
1160		zip_searchorder[4] = tmp;
1161	}
1162
1163	mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1164			     NULL, PYTHON_API_VERSION);
1165
1166	ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1167					    PyExc_ImportError, NULL);
1168	if (ZipImportError == NULL)
1169		return;
1170
1171	Py_INCREF(ZipImportError);
1172	if (PyModule_AddObject(mod, "ZipImportError",
1173			       ZipImportError) < 0)
1174		return;
1175
1176	Py_INCREF(&ZipImporter_Type);
1177	if (PyModule_AddObject(mod, "zipimporter",
1178			       (PyObject *)&ZipImporter_Type) < 0)
1179		return;
1180
1181	zip_directory_cache = PyDict_New();
1182	if (zip_directory_cache == NULL)
1183		return;
1184	Py_INCREF(zip_directory_cache);
1185	if (PyModule_AddObject(mod, "_zip_directory_cache",
1186			       zip_directory_cache) < 0)
1187		return;
1188}
1189