zipimport.c revision 0c0aad948aa3bc3954514d842d28d08a6cc6112f
1#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include "compile.h"
6#include <time.h>
7
8
9#define IS_SOURCE   0x0
10#define IS_BYTECODE 0x1
11#define IS_PACKAGE  0x2
12
13struct st_zip_searchorder {
14	char suffix[14];
15	int type;
16};
17
18/* zip_searchorder defines how we search for a module in the Zip
19   archive: we first search for a package __init__, then for
20   non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
21   are swapped by initzipimport() if we run in optimized mode. Also,
22   '/' is replaced by SEP there. */
23struct st_zip_searchorder zip_searchorder[] = {
24	{"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
25	{"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
26	{"/__init__.py", IS_PACKAGE | IS_SOURCE},
27	{".pyc", IS_BYTECODE},
28	{".pyo", IS_BYTECODE},
29	{".py", IS_SOURCE},
30	{"", 0}
31};
32
33/* zipimporter object definition and support */
34
35typedef struct _zipimporter ZipImporter;
36
37struct _zipimporter {
38	PyObject_HEAD
39	PyObject *archive;  /* pathname of the Zip archive */
40	PyObject *prefix;   /* file prefix: "a/sub/directory/" */
41	PyObject *files;    /* dict with file info {path: toc_entry} */
42};
43
44static PyTypeObject ZipImporter_Type;
45static PyObject *ZipImportError;
46static PyObject *zip_directory_cache = NULL;
47
48/* forward decls */
49static PyObject *read_directory(char *archive);
50static PyObject *get_data(char *archive, PyObject *toc_entry);
51static PyObject *get_module_code(ZipImporter *self, char *fullname,
52				 int *p_ispackage, char **p_modpath);
53
54
55#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56
57
58/* zipimporter.__init__
59   Split the "subdirectory" from the Zip archive path, lookup a matching
60   entry in sys.path_importer_cache, fetch the file directory from there
61   if found, or else read it from the archive. */
62static int
63zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
64{
65	char *path, *p, *prefix, buf[MAXPATHLEN+2];
66	int len;
67
68	if (!PyArg_ParseTuple(args, "s:zipimporter",
69			      &path))
70		return -1;
71
72	len = strlen(path);
73	if (len == 0) {
74		PyErr_SetString(ZipImportError, "archive path is empty");
75		return -1;
76	}
77	if (len >= MAXPATHLEN) {
78		PyErr_SetString(ZipImportError,
79				"archive path too long");
80		return -1;
81	}
82	strcpy(buf, path);
83
84#ifdef ALTSEP
85	for (p = buf; *p; p++) {
86		if (*p == ALTSEP)
87			*p = SEP;
88	}
89#endif
90
91	path = NULL;
92	prefix = NULL;
93	for (;;) {
94		struct stat statbuf;
95		int rv;
96
97		rv = stat(buf, &statbuf);
98		if (rv == 0) {
99			/* it exists */
100			if (S_ISREG(statbuf.st_mode))
101				/* it's a file */
102				path = buf;
103			break;
104		}
105		/* back up one path element */
106		p = strrchr(buf, SEP);
107		if (prefix != NULL)
108			*prefix = SEP;
109		if (p == NULL)
110			break;
111		*p = '\0';
112		prefix = p;
113	}
114	if (path != NULL) {
115		PyObject *files;
116		files = PyDict_GetItemString(zip_directory_cache, path);
117		if (files == NULL) {
118			files = read_directory(buf);
119			if (files == NULL)
120				return -1;
121			if (PyDict_SetItemString(zip_directory_cache, path,
122						 files) != 0)
123				return -1;
124		}
125		else
126			Py_INCREF(files);
127		self->files = files;
128	}
129	else {
130		PyErr_SetString(ZipImportError, "not a Zip file");
131		return -1;
132	}
133
134	if (prefix == NULL)
135		prefix = "";
136	else {
137		prefix++;
138		len = strlen(prefix);
139		if (prefix[len-1] != SEP) {
140			/* add trailing SEP */
141			prefix[len] = SEP;
142			prefix[len + 1] = '\0';
143		}
144	}
145
146	self->archive = PyString_FromString(buf);
147	if (self->archive == NULL)
148		return -1;
149
150	self->prefix = PyString_FromString(prefix);
151	if (self->prefix == NULL)
152		return -1;
153
154	return 0;
155}
156
157/* GC support. */
158static int
159zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
160{
161	ZipImporter *self = (ZipImporter *)obj;
162	int err;
163
164	if (self->files != NULL) {
165		err = visit(self->files, arg);
166		if (err)
167			return err;
168	}
169	return 0;
170}
171
172static void
173zipimporter_dealloc(ZipImporter *self)
174{
175	PyObject_GC_UnTrack(self);
176	Py_XDECREF(self->archive);
177	Py_XDECREF(self->prefix);
178	Py_XDECREF(self->files);
179	self->ob_type->tp_free((PyObject *)self);
180}
181
182static PyObject *
183zipimporter_repr(ZipImporter *self)
184{
185	char buf[500];
186	char *archive = "???";
187	char *prefix = "";
188
189	if (self->archive != NULL && PyString_Check(self->archive))
190		archive = PyString_AsString(self->archive);
191	if (self->prefix != NULL && PyString_Check(self->prefix))
192		prefix = PyString_AsString(self->prefix);
193	if (prefix != NULL && *prefix)
194		PyOS_snprintf(buf, sizeof(buf),
195			      "<zipimporter object \"%.300s%c%.150s\">",
196			      archive, SEP, prefix);
197	else
198		PyOS_snprintf(buf, sizeof(buf),
199			      "<zipimporter object \"%.300s\">",
200			      archive);
201	return PyString_FromString(buf);
202}
203
204/* return fullname.split(".")[-1] */
205static char *
206get_subname(char *fullname)
207{
208	char *subname = strrchr(fullname, '.');
209	if (subname == NULL)
210		subname = fullname;
211	else
212		subname++;
213	return subname;
214}
215
216/* Given a (sub)modulename, write the potential file path in the
217   archive (without extension) to the path buffer. Return the
218   length of the resulting string. */
219static int
220make_filename(char *prefix, char *name, char *path)
221{
222	int len;
223	char *p;
224
225	len = strlen(prefix);
226
227	/* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
228	if (len + strlen(name) + 13 >= MAXPATHLEN) {
229		PyErr_SetString(ZipImportError, "path too long");
230		return -1;
231	}
232
233	strcpy(path, prefix);
234	strcpy(path + len, name);
235	for (p = path + len; *p; p++) {
236		if (*p == '.')
237			*p = SEP;
238	}
239	len += strlen(name);
240	return len;
241}
242
243enum module_info {
244	MI_ERROR,
245	MI_NOT_FOUND,
246	MI_MODULE,
247	MI_PACKAGE
248};
249
250/* Return some information about a module. */
251static enum module_info
252get_module_info(ZipImporter *self, char *fullname)
253{
254	char *subname, path[MAXPATHLEN + 1];
255	int len;
256	struct st_zip_searchorder *zso;
257
258	subname = get_subname(fullname);
259
260	len = make_filename(PyString_AsString(self->prefix), subname, path);
261	if (len < 0)
262		return MI_ERROR;
263
264	for (zso = zip_searchorder; *zso->suffix; zso++) {
265		strcpy(path + len, zso->suffix);
266		if (PyDict_GetItemString(self->files, path) != NULL) {
267			if (zso->type & IS_PACKAGE)
268				return MI_PACKAGE;
269			else
270				return MI_MODULE;
271		}
272	}
273	return MI_NOT_FOUND;
274}
275
276/* Check whether we can satisfy the import of the module named by
277   'fullname'. Return self if we can, None if we can't. */
278static PyObject *
279zipimporter_find_module(PyObject *obj, PyObject *args)
280{
281	ZipImporter *self = (ZipImporter *)obj;
282	PyObject *path = NULL;
283	char *fullname;
284	enum module_info mi;
285
286	if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
287			      &fullname, &path))
288		return NULL;
289
290	mi = get_module_info(self, fullname);
291	if (mi == MI_ERROR)
292		return NULL;
293	if (mi == MI_NOT_FOUND) {
294		Py_INCREF(Py_None);
295		return Py_None;
296	}
297	Py_INCREF(self);
298	return (PyObject *)self;
299}
300
301/* Load and return the module named by 'fullname'. */
302static PyObject *
303zipimporter_load_module(PyObject *obj, PyObject *args)
304{
305	ZipImporter *self = (ZipImporter *)obj;
306	PyObject *code, *mod, *dict;
307	char *fullname, *modpath;
308	int ispackage;
309
310	if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
311			      &fullname))
312		return NULL;
313
314	code = get_module_code(self, fullname, &ispackage, &modpath);
315	if (code == NULL)
316		return NULL;
317
318	mod = PyImport_AddModule(fullname);
319	if (mod == NULL) {
320		Py_DECREF(code);
321		return NULL;
322	}
323	dict = PyModule_GetDict(mod);
324
325	/* mod.__loader__ = self */
326	if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
327		goto error;
328
329	if (ispackage) {
330		/* add __path__ to the module *before* the code gets
331		   executed */
332		PyObject *pkgpath, *fullpath;
333		char *prefix = PyString_AsString(self->prefix);
334		char *subname = get_subname(fullname);
335		int err;
336
337		fullpath = PyString_FromFormat("%s%c%s%s",
338					PyString_AsString(self->archive),
339					SEP,
340					*prefix ? prefix : "",
341					subname);
342		if (fullpath == NULL)
343			goto error;
344
345		pkgpath = Py_BuildValue("[O]", fullpath);
346		Py_DECREF(fullpath);
347		if (pkgpath == NULL)
348			goto error;
349		err = PyDict_SetItemString(dict, "__path__", pkgpath);
350		Py_DECREF(pkgpath);
351		if (err != 0)
352			goto error;
353	}
354	mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
355	Py_DECREF(code);
356	if (Py_VerboseFlag)
357		PySys_WriteStderr("import %s # loaded from Zip %s\n",
358				  fullname, modpath);
359	return mod;
360error:
361	Py_DECREF(code);
362	Py_DECREF(mod);
363	return NULL;
364}
365
366/* Return a bool signifying whether the module is a package or not. */
367static PyObject *
368zipimporter_is_package(PyObject *obj, PyObject *args)
369{
370	ZipImporter *self = (ZipImporter *)obj;
371	char *fullname;
372	enum module_info mi;
373
374	if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
375			      &fullname))
376		return NULL;
377
378	mi = get_module_info(self, fullname);
379	if (mi == MI_ERROR)
380		return NULL;
381	if (mi == MI_NOT_FOUND) {
382		PyErr_Format(ZipImportError, "can't find module '%.200s'",
383			     fullname);
384		return NULL;
385	}
386	return PyBool_FromLong(mi == MI_PACKAGE);
387}
388
389static PyObject *
390zipimporter_get_data(PyObject *obj, PyObject *args)
391{
392	ZipImporter *self = (ZipImporter *)obj;
393	char *path;
394#ifdef ALTSEP
395	char *p, buf[MAXPATHLEN + 1];
396#endif
397	PyObject *toc_entry;
398	int len;
399
400	if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
401		return NULL;
402
403#ifdef ALTSEP
404	if (strlen(path) >= MAXPATHLEN) {
405		PyErr_SetString(ZipImportError, "path too long");
406		return NULL;
407	}
408	strcpy(buf, path);
409	for (p = buf; *p; p++) {
410		if (*p == ALTSEP)
411			*p = SEP;
412	}
413	path = buf;
414#endif
415	len = PyString_Size(self->archive);
416	if ((size_t)len < strlen(path) &&
417	    strncmp(path, PyString_AsString(self->archive), len) == 0 &&
418	    path[len] == SEP) {
419		path = path + len + 1;
420	}
421
422	toc_entry = PyDict_GetItemString(self->files, path);
423	if (toc_entry == NULL) {
424		PyErr_Format(PyExc_IOError, "file not found [%.200s]",
425			     path);
426		return NULL;
427	}
428	return get_data(PyString_AsString(self->archive), toc_entry);
429}
430
431static PyObject *
432zipimporter_get_code(PyObject *obj, PyObject *args)
433{
434	ZipImporter *self = (ZipImporter *)obj;
435	char *fullname;
436
437	if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
438		return NULL;
439
440	return get_module_code(self, fullname, NULL, NULL);
441}
442
443static PyObject *
444zipimporter_get_source(PyObject *obj, PyObject *args)
445{
446	ZipImporter *self = (ZipImporter *)obj;
447	PyObject *toc_entry;
448	char *fullname, *subname, path[MAXPATHLEN+1];
449	int len;
450	enum module_info mi;
451
452	if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
453		return NULL;
454
455	mi = get_module_info(self, fullname);
456	if (mi == MI_ERROR)
457		return NULL;
458	if (mi == MI_NOT_FOUND) {
459		PyErr_Format(ZipImportError, "can't find module '%.200s'",
460			     fullname);
461		return NULL;
462	}
463	subname = get_subname(fullname);
464
465	len = make_filename(PyString_AsString(self->prefix), subname, path);
466	if (len < 0)
467		return NULL;
468
469	if (mi == MI_PACKAGE) {
470		path[len] = SEP;
471		strcpy(path + len + 1, "__init__.py");
472	}
473	else
474		strcpy(path + len, ".py");
475
476	toc_entry = PyDict_GetItemString(self->files, path);
477	if (toc_entry != NULL)
478		return get_data(PyString_AsString(self->archive), toc_entry);
479
480	/* we have the module, but no source */
481	Py_INCREF(Py_None);
482	return Py_None;
483}
484
485PyDoc_STRVAR(doc_find_module,
486"find_module(fullname, path=None) -> self or None.\n\
487\n\
488Search for a module specified by 'fullname'. 'fullname' must be the\n\
489fully qualified (dotted) module name. It returns the zipimporter\n\
490instance itself if the module was found, or None if it wasn't.\n\
491The optional 'path' argument is ignored -- it's there for compatibility\n\
492with the importer protocol.");
493
494PyDoc_STRVAR(doc_load_module,
495"load_module(fullname) -> module.\n\
496\n\
497Load the module specified by 'fullname'. 'fullname' must be the\n\
498fully qualified (dotted) module name. It returns the imported\n\
499module, or raises ZipImportError if it wasn't found.");
500
501PyDoc_STRVAR(doc_get_data,
502"get_data(pathname) -> string with file data.\n\
503\n\
504Return the data associated with 'pathname'. Raise IOError if\n\
505the file wasn't found.");
506
507PyDoc_STRVAR(doc_is_package,
508"is_package(fullname) -> bool.\n\
509\n\
510Return True if the module specified by fullname is a package.\n\
511Raise ZipImportError is the module couldn't be found.");
512
513PyDoc_STRVAR(doc_get_code,
514"get_code(fullname) -> code object.\n\
515\n\
516Return the code object for the specified module. Raise ZipImportError\n\
517is the module couldn't be found.");
518
519PyDoc_STRVAR(doc_get_source,
520"get_source(fullname) -> source string.\n\
521\n\
522Return the source code for the specified module. Raise ZipImportError\n\
523is the module couldn't be found, return None if the archive does\n\
524contain the module, but has no source for it.");
525
526static PyMethodDef zipimporter_methods[] = {
527	{"find_module", zipimporter_find_module, METH_VARARGS,
528	 doc_find_module},
529	{"load_module", zipimporter_load_module, METH_VARARGS,
530	 doc_load_module},
531	{"get_data", zipimporter_get_data, METH_VARARGS,
532	 doc_get_data},
533	{"get_code", zipimporter_get_code, METH_VARARGS,
534	 doc_get_code},
535	{"get_source", zipimporter_get_source, METH_VARARGS,
536	 doc_get_source},
537	{"is_package", zipimporter_is_package, METH_VARARGS,
538	 doc_is_package},
539	{NULL,		NULL}	/* sentinel */
540};
541
542static PyMemberDef zipimporter_members[] = {
543	{"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
544	{"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
545	{"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
546	{NULL}
547};
548
549PyDoc_STRVAR(zipimporter_doc,
550"zipimporter(archivepath) -> zipimporter object\n\
551\n\
552Create a new zipimporter instance. 'archivepath' must be a path to\n\
553a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
554a valid Zip archive.");
555
556#define DEFERRED_ADDRESS(ADDR) 0
557
558static PyTypeObject ZipImporter_Type = {
559	PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
560	0,
561	"zipimport.zipimporter",
562	sizeof(ZipImporter),
563	0,					/* tp_itemsize */
564	(destructor)zipimporter_dealloc,	/* tp_dealloc */
565	0,					/* tp_print */
566	0,					/* tp_getattr */
567	0,					/* tp_setattr */
568	0,					/* tp_compare */
569	(reprfunc)zipimporter_repr,		/* tp_repr */
570	0,					/* tp_as_number */
571	0,					/* tp_as_sequence */
572	0,					/* tp_as_mapping */
573	0,					/* tp_hash */
574	0,					/* tp_call */
575	0,					/* tp_str */
576	PyObject_GenericGetAttr,		/* tp_getattro */
577	0,					/* tp_setattro */
578	0,					/* tp_as_buffer */
579	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
580		Py_TPFLAGS_HAVE_GC,		/* tp_flags */
581	zipimporter_doc,			/* tp_doc */
582	zipimporter_traverse,			/* tp_traverse */
583	0,					/* tp_clear */
584	0,					/* tp_richcompare */
585	0,					/* tp_weaklistoffset */
586	0,					/* tp_iter */
587	0,					/* tp_iternext */
588	zipimporter_methods,			/* tp_methods */
589	zipimporter_members,			/* tp_members */
590	0,					/* tp_getset */
591	0,					/* tp_base */
592	0,					/* tp_dict */
593	0,					/* tp_descr_get */
594	0,					/* tp_descr_set */
595	0,					/* tp_dictoffset */
596	(initproc)zipimporter_init,		/* tp_init */
597	PyType_GenericAlloc,			/* tp_alloc */
598	PyType_GenericNew,			/* tp_new */
599	PyObject_GC_Del,			/* tp_free */
600};
601
602
603/* implementation */
604
605/* Given a buffer, return the long that is represented by the first
606   4 bytes, encoded as little endian. This partially reimplements
607   marshal.c:r_long() */
608static long
609get_long(unsigned char *buf) {
610	long x;
611	x =  buf[0];
612	x |= (long)buf[1] <<  8;
613	x |= (long)buf[2] << 16;
614	x |= (long)buf[3] << 24;
615#if SIZEOF_LONG > 4
616	/* Sign extension for 64-bit machines */
617	x |= -(x & 0x80000000L);
618#endif
619	return x;
620}
621
622/*
623   read_directory(archive) -> files dict (new reference)
624
625   Given a path to a Zip archive, build a dict, mapping file names
626   (local to the archive, using SEP as a separator) to toc entries.
627
628   A toc_entry is a tuple:
629
630       (compress,      # compression kind; 0 for uncompressed
631        data_size,     # size of compressed data on disk
632        file_size,     # size of decompressed data
633        file_offset,   # offset of file header from start of archive
634        time,          # mod time of file (in dos format)
635        date,          # mod data of file (in dos format)
636        crc,           # crc checksum of the data
637       )
638
639   Directories can be recognized by the trailing SEP in the name,
640   data_size and file_offset are 0.
641*/
642static PyObject *
643read_directory(char *archive)
644{
645	PyObject *files = NULL;
646	FILE *fp;
647	long compress, crc, data_size, file_size, file_offset, date, time;
648	long header_offset, name_size, header_size, header_end;
649	long i, l, length, count;
650	char path[MAXPATHLEN + 5];
651	char name[MAXPATHLEN + 5];
652	char *p, endof_central_dir[22];
653
654	if (strlen(archive) > MAXPATHLEN) {
655		PyErr_SetString(PyExc_OverflowError,
656				"Zip path name is too long");
657		return NULL;
658	}
659	strcpy(path, archive);
660
661	fp = fopen(archive, "rb");
662	if (fp == NULL) {
663		PyErr_Format(ZipImportError, "can't open Zip file: "
664			     "'%.200s'", archive);
665		return NULL;
666	}
667	fseek(fp, -22, 2);	/* Seek from end of file */
668	header_end = ftell(fp);
669	if (fread(endof_central_dir, 1, 22, fp) != 22) {
670		fclose(fp);
671		PyErr_Format(ZipImportError, "can't read Zip file: "
672			     "'%.200s'", archive);
673		return NULL;
674	}
675	if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
676		/* Bad: End of Central Dir signature */
677		fclose(fp);
678		PyErr_Format(ZipImportError, "not a Zip file: "
679			     "'%.200s'", archive);
680		return NULL;
681	}
682
683	header_offset = get_long((unsigned char *)endof_central_dir + 16);
684
685	files = PyDict_New();
686	if (files == NULL)
687		goto error;
688
689	length = (long)strlen(path);
690	path[length] = SEP;
691
692	/* Start of Central Directory */
693	count = 0;
694	for (;;) {
695		PyObject *t;
696		int err;
697
698		fseek(fp, header_offset, 0);  /* Start of file header */
699		l = PyMarshal_ReadLongFromFile(fp);
700		if (l != 0x02014B50)
701			break;	/* Bad: Central Dir File Header */
702		fseek(fp, header_offset + 10, 0);
703		compress = PyMarshal_ReadShortFromFile(fp);
704		time = PyMarshal_ReadShortFromFile(fp);
705		date = PyMarshal_ReadShortFromFile(fp);
706		crc = PyMarshal_ReadLongFromFile(fp);
707		data_size = PyMarshal_ReadLongFromFile(fp);
708		file_size = PyMarshal_ReadLongFromFile(fp);
709		name_size = PyMarshal_ReadShortFromFile(fp);
710		header_size = 46 + name_size +
711		   PyMarshal_ReadShortFromFile(fp) +
712		   PyMarshal_ReadShortFromFile(fp);
713		fseek(fp, header_offset + 42, 0);
714		file_offset = PyMarshal_ReadLongFromFile(fp);
715		if (name_size > MAXPATHLEN)
716			name_size = MAXPATHLEN;
717
718		p = name;
719		for (i = 0; i < name_size; i++) {
720			*p = (char)getc(fp);
721			if (*p == '/')
722				*p = SEP;
723			p++;
724		}
725		*p = 0;	/* Add terminating null byte */
726		header_offset += header_size;
727
728		strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
729
730		t = Py_BuildValue("siiiiiii", path, compress, data_size,
731				  file_size, file_offset, time, date, crc);
732		if (t == NULL)
733			goto error;
734		err = PyDict_SetItemString(files, name, t);
735		Py_DECREF(t);
736		if (err != 0)
737			goto error;
738		count++;
739	}
740	fclose(fp);
741	if (Py_VerboseFlag)
742		PySys_WriteStderr("# zipimport: found %ld names in %s\n",
743			count, archive);
744	return files;
745error:
746	fclose(fp);
747	Py_XDECREF(files);
748	return NULL;
749}
750
751/* Return the zlib.decompress function object, or NULL if zlib couldn't
752   be imported. The function is cached when found, so subsequent calls
753   don't import zlib again. Returns a *borrowed* reference.
754   XXX This makes zlib.decompress immortal. */
755static PyObject *
756get_decompress_func(void)
757{
758	static PyObject *decompress = NULL;
759
760	if (decompress == NULL) {
761		PyObject *zlib;
762		static int importing_zlib = 0;
763
764		if (importing_zlib != 0)
765			/* Someone has a zlib.py[co] in their Zip file;
766			   let's avoid a stack overflow. */
767			return NULL;
768		importing_zlib = 1;
769		zlib = PyImport_ImportModule("zlib");	/* import zlib */
770		importing_zlib = 0;
771		if (zlib != NULL) {
772			decompress = PyObject_GetAttrString(zlib,
773							    "decompress");
774			Py_DECREF(zlib);
775		}
776		else
777			PyErr_Clear();
778		if (Py_VerboseFlag)
779			PySys_WriteStderr("# zipimport: zlib %s\n",
780				zlib != NULL ? "available": "UNAVAILABLE");
781	}
782	return decompress;
783}
784
785/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
786   data as a new reference. */
787static PyObject *
788get_data(char *archive, PyObject *toc_entry)
789{
790	PyObject *raw_data, *data = NULL, *decompress;
791	char *buf;
792	FILE *fp;
793	int err, bytes_read = 0;
794	long l;
795	char *datapath;
796	long compress, data_size, file_size, file_offset;
797	long time, date, crc;
798
799	if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
800			      &data_size, &file_size, &file_offset, &time,
801			      &date, &crc)) {
802		return NULL;
803	}
804
805	fp = fopen(archive, "rb");
806	if (!fp) {
807		PyErr_Format(PyExc_IOError,
808		   "zipimport: can not open file %s", archive);
809		return NULL;
810	}
811
812	/* Check to make sure the local file header is correct */
813	fseek(fp, file_offset, 0);
814	l = PyMarshal_ReadLongFromFile(fp);
815	if (l != 0x04034B50) {
816		/* Bad: Local File Header */
817		PyErr_Format(ZipImportError,
818			     "bad local file header in %s",
819			     archive);
820		fclose(fp);
821		return NULL;
822	}
823	fseek(fp, file_offset + 26, 0);
824	l = 30 + PyMarshal_ReadShortFromFile(fp) +
825	    PyMarshal_ReadShortFromFile(fp);	/* local header size */
826	file_offset += l;	/* Start of file data */
827
828	raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
829					      data_size : data_size + 1);
830	if (raw_data == NULL) {
831		fclose(fp);
832		return NULL;
833	}
834	buf = PyString_AsString(raw_data);
835
836	err = fseek(fp, file_offset, 0);
837	if (err == 0)
838		bytes_read = fread(buf, 1, data_size, fp);
839	fclose(fp);
840	if (err || bytes_read != data_size) {
841		PyErr_SetString(PyExc_IOError,
842				"zipimport: can't read data");
843		Py_DECREF(raw_data);
844		return NULL;
845	}
846
847	if (compress != 0) {
848		buf[data_size] = 'Z';  /* saw this in zipfile.py */
849		data_size++;
850	}
851	buf[data_size] = '\0';
852
853	if (compress == 0)  /* data is not compressed */
854		return raw_data;
855
856	/* Decompress with zlib */
857	decompress = get_decompress_func();
858	if (decompress == NULL) {
859		PyErr_SetString(ZipImportError,
860				"can't decompress data; "
861				"zlib not available");
862		goto error;
863	}
864	data = PyObject_CallFunction(decompress, "Ol", raw_data, -15);
865error:
866	Py_DECREF(raw_data);
867	return data;
868}
869
870/* Lenient date/time comparison function. The precision of the mtime
871   in the archive is lower than the mtime stored in a .pyc: we
872   must allow a difference of at most one second. */
873static int
874eq_mtime(time_t t1, time_t t2)
875{
876	time_t d = t1 - t2;
877	if (d < 0)
878		d = -d;
879	/* dostime only stores even seconds, so be lenient */
880	return d <= 1;
881}
882
883/* Given the contents of a .py[co] file in a buffer, unmarshal the data
884   and return the code object. Return None if it the magic word doesn't
885   match (we do this instead of raising an exception as we fall back
886   to .py if available and we don't want to mask other errors).
887   Returns a new reference. */
888static PyObject *
889unmarshal_code(char *pathname, PyObject *data, time_t mtime)
890{
891	PyObject *code;
892	char *buf = PyString_AsString(data);
893	int size = PyString_Size(data);
894
895	if (size <= 9) {
896		PyErr_SetString(ZipImportError,
897				"bad pyc data");
898		return NULL;
899	}
900
901	if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
902		if (Py_VerboseFlag)
903			PySys_WriteStderr("# %s has bad magic\n",
904					  pathname);
905		Py_INCREF(Py_None);
906		return Py_None;  /* signal caller to try alternative */
907	}
908
909	if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
910				    mtime)) {
911		if (Py_VerboseFlag)
912			PySys_WriteStderr("# %s has bad mtime\n",
913					  pathname);
914		Py_INCREF(Py_None);
915		return Py_None;  /* signal caller to try alternative */
916	}
917
918	code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
919	if (code == NULL)
920		return NULL;
921	if (!PyCode_Check(code)) {
922		Py_DECREF(code);
923		PyErr_Format(PyExc_TypeError,
924		     "compiled module %.200s is not a code object",
925		     pathname);
926		return NULL;
927	}
928	return code;
929}
930
931/* Replace any occurances of "\r\n?" in the input string with "\n".
932   This converts DOS and Mac line endings to Unix line endings.
933   Also append a trailing "\n" to be compatible with
934   PyParser_SimpleParseFile(). Returns a new reference. */
935static PyObject *
936normalize_line_endings(PyObject *source)
937{
938	char *buf, *q, *p = PyString_AsString(source);
939	PyObject *fixed_source;
940
941	/* one char extra for trailing \n and one for terminating \0 */
942	buf = PyMem_Malloc(PyString_Size(source) + 2);
943	if (buf == NULL) {
944		PyErr_SetString(PyExc_MemoryError,
945				"zipimport: no memory to allocate "
946				"source buffer");
947		return NULL;
948	}
949	/* replace "\r\n?" by "\n" */
950	for (q = buf; *p != '\0'; p++) {
951		if (*p == '\r') {
952			*q++ = '\n';
953			if (*(p + 1) == '\n')
954				p++;
955		}
956		else
957			*q++ = *p;
958	}
959	*q++ = '\n';  /* add trailing \n */
960	*q = '\0';
961	fixed_source = PyString_FromString(buf);
962	PyMem_Free(buf);
963	return fixed_source;
964}
965
966/* Given a string buffer containing Python source code, compile it
967   return and return a code object as a new reference. */
968static PyObject *
969compile_source(char *pathname, PyObject *source)
970{
971	PyObject *code, *fixed_source;
972
973	fixed_source = normalize_line_endings(source);
974	if (fixed_source == NULL)
975		return NULL;
976
977	code = Py_CompileString(PyString_AsString(fixed_source), pathname,
978				Py_file_input);
979	Py_DECREF(fixed_source);
980	return code;
981}
982
983/* Convert the date/time values found in the Zip archive to a value
984   that's compatible with the time stamp stored in .pyc files. */
985time_t parse_dostime(int dostime, int dosdate)
986{
987	struct tm stm;
988
989	stm.tm_sec   =  (dostime        & 0x1f) * 2;
990	stm.tm_min   =  (dostime >> 5)  & 0x3f;
991	stm.tm_hour  =  (dostime >> 11) & 0x1f;
992	stm.tm_mday  =   dosdate        & 0x1f;
993	stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
994	stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
995	stm.tm_isdst =   0; /* wday/yday is ignored */
996
997	return mktime(&stm);
998}
999
1000/* Given a path to a .pyc or .pyo file in the archive, return the
1001   modifictaion time of the matching .py file, or 0 if no source
1002   is available. */
1003static time_t
1004get_mtime_of_source(ZipImporter *self, char *path)
1005{
1006	PyObject *toc_entry;
1007	time_t mtime = 0;
1008	int lastchar = strlen(path) - 1;
1009	char savechar = path[lastchar];
1010	path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
1011	toc_entry = PyDict_GetItemString(self->files, path);
1012	if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1013	    PyTuple_Size(toc_entry) == 8) {
1014		/* fetch the time stamp of the .py file for comparison
1015		   with an embedded pyc time stamp */
1016		int time, date;
1017		time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1018		date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1019		mtime = parse_dostime(time, date);
1020	}
1021	path[lastchar] = savechar;
1022	return mtime;
1023}
1024
1025/* Return the code object for the module named by 'fullname' from the
1026   Zip archive as a new reference. */
1027static PyObject *
1028get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1029		   time_t mtime, PyObject *toc_entry)
1030{
1031	PyObject *data, *code;
1032	char *modpath;
1033	char *archive = PyString_AsString(self->archive);
1034
1035	if (archive == NULL)
1036		return NULL;
1037
1038	data = get_data(archive, toc_entry);
1039	if (data == NULL)
1040		return NULL;
1041
1042	modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1043
1044	if (isbytecode) {
1045		code = unmarshal_code(modpath, data, mtime);
1046	}
1047	else {
1048		code = compile_source(modpath, data);
1049	}
1050	Py_DECREF(data);
1051	return code;
1052}
1053
1054/* Get the code object assoiciated with the module specified by
1055   'fullname'. */
1056static PyObject *
1057get_module_code(ZipImporter *self, char *fullname,
1058		int *p_ispackage, char **p_modpath)
1059{
1060	PyObject *toc_entry;
1061	char *subname, path[MAXPATHLEN + 1];
1062	int len;
1063	struct st_zip_searchorder *zso;
1064
1065	subname = get_subname(fullname);
1066
1067	len = make_filename(PyString_AsString(self->prefix), subname, path);
1068	if (len < 0)
1069		return NULL;
1070
1071	for (zso = zip_searchorder; *zso->suffix; zso++) {
1072		PyObject *code = NULL;
1073
1074		strcpy(path + len, zso->suffix);
1075		if (Py_VerboseFlag > 1)
1076			PySys_WriteStderr("# trying %s%c%s\n",
1077					  PyString_AsString(self->archive),
1078					  SEP, path);
1079		toc_entry = PyDict_GetItemString(self->files, path);
1080		if (toc_entry != NULL) {
1081			time_t mtime = 0;
1082			int ispackage = zso->type & IS_PACKAGE;
1083			int isbytecode = zso->type & IS_BYTECODE;
1084
1085			if (isbytecode)
1086				mtime = get_mtime_of_source(self, path);
1087			if (p_ispackage != NULL)
1088				*p_ispackage = ispackage;
1089			code = get_code_from_data(self, ispackage,
1090						  isbytecode, mtime,
1091						  toc_entry);
1092			if (code == Py_None) {
1093				/* bad magic number or non-matching mtime
1094				   in byte code, try next */
1095				Py_DECREF(code);
1096				continue;
1097			}
1098			if (code != NULL && p_modpath != NULL)
1099				*p_modpath = PyString_AsString(
1100					PyTuple_GetItem(toc_entry, 0));
1101			return code;
1102		}
1103	}
1104	PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1105	return NULL;
1106}
1107
1108
1109/* Module init */
1110
1111PyDoc_STRVAR(zipimport_doc,
1112"zipimport provides support for importing Python modules from Zip archives.\n\
1113\n\
1114This module exports three objects:\n\
1115- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1116- ZipImporterError: exception raised by zipimporter objects. It's a\n\
1117  subclass of ImportError, so it can be caught as ImportError, too.\n\
1118- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1119  info dicts, as used in zipimporter._files.\n\
1120\n\
1121It is usually not needed to use the zipimport module explicitly; it is\n\
1122used by the builtin import mechanism for sys.path items that are paths\n\
1123to Zip archives.");
1124
1125PyMODINIT_FUNC
1126initzipimport(void)
1127{
1128	PyObject *mod;
1129
1130	if (PyType_Ready(&ZipImporter_Type) < 0)
1131		return;
1132
1133	/* Correct directory separator */
1134	zip_searchorder[0].suffix[0] = SEP;
1135	zip_searchorder[1].suffix[0] = SEP;
1136	zip_searchorder[2].suffix[0] = SEP;
1137	if (Py_OptimizeFlag) {
1138		/* Reverse *.pyc and *.pyo */
1139		struct st_zip_searchorder tmp;
1140		tmp = zip_searchorder[0];
1141		zip_searchorder[0] = zip_searchorder[1];
1142		zip_searchorder[1] = tmp;
1143		tmp = zip_searchorder[3];
1144		zip_searchorder[3] = zip_searchorder[4];
1145		zip_searchorder[4] = tmp;
1146	}
1147
1148	mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1149			     NULL, PYTHON_API_VERSION);
1150
1151	ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1152					    PyExc_ImportError, NULL);
1153	if (ZipImportError == NULL)
1154		return;
1155
1156	Py_INCREF(ZipImportError);
1157	if (PyModule_AddObject(mod, "ZipImportError",
1158			       ZipImportError) < 0)
1159		return;
1160
1161	Py_INCREF(&ZipImporter_Type);
1162	if (PyModule_AddObject(mod, "zipimporter",
1163			       (PyObject *)&ZipImporter_Type) < 0)
1164		return;
1165
1166	zip_directory_cache = PyDict_New();
1167	if (zip_directory_cache == NULL)
1168		return;
1169	Py_INCREF(zip_directory_cache);
1170	if (PyModule_AddObject(mod, "_zip_directory_cache",
1171			       zip_directory_cache) < 0)
1172		return;
1173}
1174