zipimport.c revision a94568a7535de60f1144e4eea0d027b87017a4b4
1#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include "compile.h"
6#include <time.h>
7
8
9#define IS_SOURCE   0x0
10#define IS_BYTECODE 0x1
11#define IS_PACKAGE  0x2
12
13struct st_zip_searchorder {
14	char suffix[14];
15	int type;
16};
17
18/* zip_searchorder defines how we search for a module in the Zip
19   archive: we first search for a package __init__, then for
20   non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
21   are swapped by initzipimport() if we run in optimized mode. Also,
22   '/' is replaced by SEP there. */
23static struct st_zip_searchorder zip_searchorder[] = {
24	{"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
25	{"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
26	{"/__init__.py", IS_PACKAGE | IS_SOURCE},
27	{".pyc", IS_BYTECODE},
28	{".pyo", IS_BYTECODE},
29	{".py", IS_SOURCE},
30	{"", 0}
31};
32
33/* zipimporter object definition and support */
34
35typedef struct _zipimporter ZipImporter;
36
37struct _zipimporter {
38	PyObject_HEAD
39	PyObject *archive;  /* pathname of the Zip archive */
40	PyObject *prefix;   /* file prefix: "a/sub/directory/" */
41	PyObject *files;    /* dict with file info {path: toc_entry} */
42};
43
44static PyTypeObject ZipImporter_Type;
45static PyObject *ZipImportError;
46static PyObject *zip_directory_cache = NULL;
47
48/* forward decls */
49static PyObject *read_directory(char *archive);
50static PyObject *get_data(char *archive, PyObject *toc_entry);
51static PyObject *get_module_code(ZipImporter *self, char *fullname,
52				 int *p_ispackage, char **p_modpath);
53
54
55#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56
57
58/* zipimporter.__init__
59   Split the "subdirectory" from the Zip archive path, lookup a matching
60   entry in sys.path_importer_cache, fetch the file directory from there
61   if found, or else read it from the archive. */
62static int
63zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
64{
65	char *path, *p, *prefix, buf[MAXPATHLEN+2];
66	int len;
67
68	if (!PyArg_ParseTuple(args, "s:zipimporter",
69			      &path))
70		return -1;
71
72	len = strlen(path);
73	if (len == 0) {
74		PyErr_SetString(ZipImportError, "archive path is empty");
75		return -1;
76	}
77	if (len >= MAXPATHLEN) {
78		PyErr_SetString(ZipImportError,
79				"archive path too long");
80		return -1;
81	}
82	strcpy(buf, path);
83
84#ifdef ALTSEP
85	for (p = buf; *p; p++) {
86		if (*p == ALTSEP)
87			*p = SEP;
88	}
89#endif
90
91	path = NULL;
92	prefix = NULL;
93	for (;;) {
94#ifndef RISCOS
95		struct stat statbuf;
96		int rv;
97
98		rv = stat(buf, &statbuf);
99		if (rv == 0) {
100			/* it exists */
101			if (S_ISREG(statbuf.st_mode))
102				/* it's a file */
103				path = buf;
104			break;
105		}
106#else
107		if (object_exists(buf)) {
108			/* it exists */
109			if (isfile(buf))
110				/* it's a file */
111				path = buf;
112			break;
113		}
114#endif
115		/* back up one path element */
116		p = strrchr(buf, SEP);
117		if (prefix != NULL)
118			*prefix = SEP;
119		if (p == NULL)
120			break;
121		*p = '\0';
122		prefix = p;
123	}
124	if (path != NULL) {
125		PyObject *files;
126		files = PyDict_GetItemString(zip_directory_cache, path);
127		if (files == NULL) {
128			files = read_directory(buf);
129			if (files == NULL)
130				return -1;
131			if (PyDict_SetItemString(zip_directory_cache, path,
132						 files) != 0)
133				return -1;
134		}
135		else
136			Py_INCREF(files);
137		self->files = files;
138	}
139	else {
140		PyErr_SetString(ZipImportError, "not a Zip file");
141		return -1;
142	}
143
144	if (prefix == NULL)
145		prefix = "";
146	else {
147		prefix++;
148		len = strlen(prefix);
149		if (prefix[len-1] != SEP) {
150			/* add trailing SEP */
151			prefix[len] = SEP;
152			prefix[len + 1] = '\0';
153		}
154	}
155
156	self->archive = PyString_FromString(buf);
157	if (self->archive == NULL)
158		return -1;
159
160	self->prefix = PyString_FromString(prefix);
161	if (self->prefix == NULL)
162		return -1;
163
164	return 0;
165}
166
167/* GC support. */
168static int
169zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
170{
171	ZipImporter *self = (ZipImporter *)obj;
172	int err;
173
174	if (self->files != NULL) {
175		err = visit(self->files, arg);
176		if (err)
177			return err;
178	}
179	return 0;
180}
181
182static void
183zipimporter_dealloc(ZipImporter *self)
184{
185	PyObject_GC_UnTrack(self);
186	Py_XDECREF(self->archive);
187	Py_XDECREF(self->prefix);
188	Py_XDECREF(self->files);
189	self->ob_type->tp_free((PyObject *)self);
190}
191
192static PyObject *
193zipimporter_repr(ZipImporter *self)
194{
195	char buf[500];
196	char *archive = "???";
197	char *prefix = "";
198
199	if (self->archive != NULL && PyString_Check(self->archive))
200		archive = PyString_AsString(self->archive);
201	if (self->prefix != NULL && PyString_Check(self->prefix))
202		prefix = PyString_AsString(self->prefix);
203	if (prefix != NULL && *prefix)
204		PyOS_snprintf(buf, sizeof(buf),
205			      "<zipimporter object \"%.300s%c%.150s\">",
206			      archive, SEP, prefix);
207	else
208		PyOS_snprintf(buf, sizeof(buf),
209			      "<zipimporter object \"%.300s\">",
210			      archive);
211	return PyString_FromString(buf);
212}
213
214/* return fullname.split(".")[-1] */
215static char *
216get_subname(char *fullname)
217{
218	char *subname = strrchr(fullname, '.');
219	if (subname == NULL)
220		subname = fullname;
221	else
222		subname++;
223	return subname;
224}
225
226/* Given a (sub)modulename, write the potential file path in the
227   archive (without extension) to the path buffer. Return the
228   length of the resulting string. */
229static int
230make_filename(char *prefix, char *name, char *path)
231{
232	int len;
233	char *p;
234
235	len = strlen(prefix);
236
237	/* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
238	if (len + strlen(name) + 13 >= MAXPATHLEN) {
239		PyErr_SetString(ZipImportError, "path too long");
240		return -1;
241	}
242
243	strcpy(path, prefix);
244	strcpy(path + len, name);
245	for (p = path + len; *p; p++) {
246		if (*p == '.')
247			*p = SEP;
248	}
249	len += strlen(name);
250	return len;
251}
252
253enum module_info {
254	MI_ERROR,
255	MI_NOT_FOUND,
256	MI_MODULE,
257	MI_PACKAGE
258};
259
260/* Return some information about a module. */
261static enum module_info
262get_module_info(ZipImporter *self, char *fullname)
263{
264	char *subname, path[MAXPATHLEN + 1];
265	int len;
266	struct st_zip_searchorder *zso;
267
268	subname = get_subname(fullname);
269
270	len = make_filename(PyString_AsString(self->prefix), subname, path);
271	if (len < 0)
272		return MI_ERROR;
273
274	for (zso = zip_searchorder; *zso->suffix; zso++) {
275		strcpy(path + len, zso->suffix);
276		if (PyDict_GetItemString(self->files, path) != NULL) {
277			if (zso->type & IS_PACKAGE)
278				return MI_PACKAGE;
279			else
280				return MI_MODULE;
281		}
282	}
283	return MI_NOT_FOUND;
284}
285
286/* Check whether we can satisfy the import of the module named by
287   'fullname'. Return self if we can, None if we can't. */
288static PyObject *
289zipimporter_find_module(PyObject *obj, PyObject *args)
290{
291	ZipImporter *self = (ZipImporter *)obj;
292	PyObject *path = NULL;
293	char *fullname;
294	enum module_info mi;
295
296	if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
297			      &fullname, &path))
298		return NULL;
299
300	mi = get_module_info(self, fullname);
301	if (mi == MI_ERROR)
302		return NULL;
303	if (mi == MI_NOT_FOUND) {
304		Py_INCREF(Py_None);
305		return Py_None;
306	}
307	Py_INCREF(self);
308	return (PyObject *)self;
309}
310
311/* Load and return the module named by 'fullname'. */
312static PyObject *
313zipimporter_load_module(PyObject *obj, PyObject *args)
314{
315	ZipImporter *self = (ZipImporter *)obj;
316	PyObject *code, *mod, *dict;
317	char *fullname, *modpath;
318	int ispackage;
319
320	if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
321			      &fullname))
322		return NULL;
323
324	code = get_module_code(self, fullname, &ispackage, &modpath);
325	if (code == NULL)
326		return NULL;
327
328	mod = PyImport_AddModule(fullname);
329	if (mod == NULL) {
330		Py_DECREF(code);
331		return NULL;
332	}
333	dict = PyModule_GetDict(mod);
334
335	/* mod.__loader__ = self */
336	if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
337		goto error;
338
339	if (ispackage) {
340		/* add __path__ to the module *before* the code gets
341		   executed */
342		PyObject *pkgpath, *fullpath;
343		char *prefix = PyString_AsString(self->prefix);
344		char *subname = get_subname(fullname);
345		int err;
346
347		fullpath = PyString_FromFormat("%s%c%s%s",
348					PyString_AsString(self->archive),
349					SEP,
350					*prefix ? prefix : "",
351					subname);
352		if (fullpath == NULL)
353			goto error;
354
355		pkgpath = Py_BuildValue("[O]", fullpath);
356		Py_DECREF(fullpath);
357		if (pkgpath == NULL)
358			goto error;
359		err = PyDict_SetItemString(dict, "__path__", pkgpath);
360		Py_DECREF(pkgpath);
361		if (err != 0)
362			goto error;
363	}
364	mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
365	Py_DECREF(code);
366	if (Py_VerboseFlag)
367		PySys_WriteStderr("import %s # loaded from Zip %s\n",
368				  fullname, modpath);
369	return mod;
370error:
371	Py_DECREF(code);
372	Py_DECREF(mod);
373	return NULL;
374}
375
376/* Return a bool signifying whether the module is a package or not. */
377static PyObject *
378zipimporter_is_package(PyObject *obj, PyObject *args)
379{
380	ZipImporter *self = (ZipImporter *)obj;
381	char *fullname;
382	enum module_info mi;
383
384	if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
385			      &fullname))
386		return NULL;
387
388	mi = get_module_info(self, fullname);
389	if (mi == MI_ERROR)
390		return NULL;
391	if (mi == MI_NOT_FOUND) {
392		PyErr_Format(ZipImportError, "can't find module '%.200s'",
393			     fullname);
394		return NULL;
395	}
396	return PyBool_FromLong(mi == MI_PACKAGE);
397}
398
399static PyObject *
400zipimporter_get_data(PyObject *obj, PyObject *args)
401{
402	ZipImporter *self = (ZipImporter *)obj;
403	char *path;
404#ifdef ALTSEP
405	char *p, buf[MAXPATHLEN + 1];
406#endif
407	PyObject *toc_entry;
408	int len;
409
410	if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
411		return NULL;
412
413#ifdef ALTSEP
414	if (strlen(path) >= MAXPATHLEN) {
415		PyErr_SetString(ZipImportError, "path too long");
416		return NULL;
417	}
418	strcpy(buf, path);
419	for (p = buf; *p; p++) {
420		if (*p == ALTSEP)
421			*p = SEP;
422	}
423	path = buf;
424#endif
425	len = PyString_Size(self->archive);
426	if ((size_t)len < strlen(path) &&
427	    strncmp(path, PyString_AsString(self->archive), len) == 0 &&
428	    path[len] == SEP) {
429		path = path + len + 1;
430	}
431
432	toc_entry = PyDict_GetItemString(self->files, path);
433	if (toc_entry == NULL) {
434		PyErr_Format(PyExc_IOError, "file not found [%.200s]",
435			     path);
436		return NULL;
437	}
438	return get_data(PyString_AsString(self->archive), toc_entry);
439}
440
441static PyObject *
442zipimporter_get_code(PyObject *obj, PyObject *args)
443{
444	ZipImporter *self = (ZipImporter *)obj;
445	char *fullname;
446
447	if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
448		return NULL;
449
450	return get_module_code(self, fullname, NULL, NULL);
451}
452
453static PyObject *
454zipimporter_get_source(PyObject *obj, PyObject *args)
455{
456	ZipImporter *self = (ZipImporter *)obj;
457	PyObject *toc_entry;
458	char *fullname, *subname, path[MAXPATHLEN+1];
459	int len;
460	enum module_info mi;
461
462	if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
463		return NULL;
464
465	mi = get_module_info(self, fullname);
466	if (mi == MI_ERROR)
467		return NULL;
468	if (mi == MI_NOT_FOUND) {
469		PyErr_Format(ZipImportError, "can't find module '%.200s'",
470			     fullname);
471		return NULL;
472	}
473	subname = get_subname(fullname);
474
475	len = make_filename(PyString_AsString(self->prefix), subname, path);
476	if (len < 0)
477		return NULL;
478
479	if (mi == MI_PACKAGE) {
480		path[len] = SEP;
481		strcpy(path + len + 1, "__init__.py");
482	}
483	else
484		strcpy(path + len, ".py");
485
486	toc_entry = PyDict_GetItemString(self->files, path);
487	if (toc_entry != NULL)
488		return get_data(PyString_AsString(self->archive), toc_entry);
489
490	/* we have the module, but no source */
491	Py_INCREF(Py_None);
492	return Py_None;
493}
494
495PyDoc_STRVAR(doc_find_module,
496"find_module(fullname, path=None) -> self or None.\n\
497\n\
498Search for a module specified by 'fullname'. 'fullname' must be the\n\
499fully qualified (dotted) module name. It returns the zipimporter\n\
500instance itself if the module was found, or None if it wasn't.\n\
501The optional 'path' argument is ignored -- it's there for compatibility\n\
502with the importer protocol.");
503
504PyDoc_STRVAR(doc_load_module,
505"load_module(fullname) -> module.\n\
506\n\
507Load the module specified by 'fullname'. 'fullname' must be the\n\
508fully qualified (dotted) module name. It returns the imported\n\
509module, or raises ZipImportError if it wasn't found.");
510
511PyDoc_STRVAR(doc_get_data,
512"get_data(pathname) -> string with file data.\n\
513\n\
514Return the data associated with 'pathname'. Raise IOError if\n\
515the file wasn't found.");
516
517PyDoc_STRVAR(doc_is_package,
518"is_package(fullname) -> bool.\n\
519\n\
520Return True if the module specified by fullname is a package.\n\
521Raise ZipImportError is the module couldn't be found.");
522
523PyDoc_STRVAR(doc_get_code,
524"get_code(fullname) -> code object.\n\
525\n\
526Return the code object for the specified module. Raise ZipImportError\n\
527is the module couldn't be found.");
528
529PyDoc_STRVAR(doc_get_source,
530"get_source(fullname) -> source string.\n\
531\n\
532Return the source code for the specified module. Raise ZipImportError\n\
533is the module couldn't be found, return None if the archive does\n\
534contain the module, but has no source for it.");
535
536static PyMethodDef zipimporter_methods[] = {
537	{"find_module", zipimporter_find_module, METH_VARARGS,
538	 doc_find_module},
539	{"load_module", zipimporter_load_module, METH_VARARGS,
540	 doc_load_module},
541	{"get_data", zipimporter_get_data, METH_VARARGS,
542	 doc_get_data},
543	{"get_code", zipimporter_get_code, METH_VARARGS,
544	 doc_get_code},
545	{"get_source", zipimporter_get_source, METH_VARARGS,
546	 doc_get_source},
547	{"is_package", zipimporter_is_package, METH_VARARGS,
548	 doc_is_package},
549	{NULL,		NULL}	/* sentinel */
550};
551
552static PyMemberDef zipimporter_members[] = {
553	{"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
554	{"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
555	{"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
556	{NULL}
557};
558
559PyDoc_STRVAR(zipimporter_doc,
560"zipimporter(archivepath) -> zipimporter object\n\
561\n\
562Create a new zipimporter instance. 'archivepath' must be a path to\n\
563a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
564a valid Zip archive.");
565
566#define DEFERRED_ADDRESS(ADDR) 0
567
568static PyTypeObject ZipImporter_Type = {
569	PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
570	0,
571	"zipimport.zipimporter",
572	sizeof(ZipImporter),
573	0,					/* tp_itemsize */
574	(destructor)zipimporter_dealloc,	/* tp_dealloc */
575	0,					/* tp_print */
576	0,					/* tp_getattr */
577	0,					/* tp_setattr */
578	0,					/* tp_compare */
579	(reprfunc)zipimporter_repr,		/* tp_repr */
580	0,					/* tp_as_number */
581	0,					/* tp_as_sequence */
582	0,					/* tp_as_mapping */
583	0,					/* tp_hash */
584	0,					/* tp_call */
585	0,					/* tp_str */
586	PyObject_GenericGetAttr,		/* tp_getattro */
587	0,					/* tp_setattro */
588	0,					/* tp_as_buffer */
589	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
590		Py_TPFLAGS_HAVE_GC,		/* tp_flags */
591	zipimporter_doc,			/* tp_doc */
592	zipimporter_traverse,			/* tp_traverse */
593	0,					/* tp_clear */
594	0,					/* tp_richcompare */
595	0,					/* tp_weaklistoffset */
596	0,					/* tp_iter */
597	0,					/* tp_iternext */
598	zipimporter_methods,			/* tp_methods */
599	zipimporter_members,			/* tp_members */
600	0,					/* tp_getset */
601	0,					/* tp_base */
602	0,					/* tp_dict */
603	0,					/* tp_descr_get */
604	0,					/* tp_descr_set */
605	0,					/* tp_dictoffset */
606	(initproc)zipimporter_init,		/* tp_init */
607	PyType_GenericAlloc,			/* tp_alloc */
608	PyType_GenericNew,			/* tp_new */
609	PyObject_GC_Del,			/* tp_free */
610};
611
612
613/* implementation */
614
615/* Given a buffer, return the long that is represented by the first
616   4 bytes, encoded as little endian. This partially reimplements
617   marshal.c:r_long() */
618static long
619get_long(unsigned char *buf) {
620	long x;
621	x =  buf[0];
622	x |= (long)buf[1] <<  8;
623	x |= (long)buf[2] << 16;
624	x |= (long)buf[3] << 24;
625#if SIZEOF_LONG > 4
626	/* Sign extension for 64-bit machines */
627	x |= -(x & 0x80000000L);
628#endif
629	return x;
630}
631
632/*
633   read_directory(archive) -> files dict (new reference)
634
635   Given a path to a Zip archive, build a dict, mapping file names
636   (local to the archive, using SEP as a separator) to toc entries.
637
638   A toc_entry is a tuple:
639
640       (compress,      # compression kind; 0 for uncompressed
641        data_size,     # size of compressed data on disk
642        file_size,     # size of decompressed data
643        file_offset,   # offset of file header from start of archive
644        time,          # mod time of file (in dos format)
645        date,          # mod data of file (in dos format)
646        crc,           # crc checksum of the data
647       )
648
649   Directories can be recognized by the trailing SEP in the name,
650   data_size and file_offset are 0.
651*/
652static PyObject *
653read_directory(char *archive)
654{
655	PyObject *files = NULL;
656	FILE *fp;
657	long compress, crc, data_size, file_size, file_offset, date, time;
658	long header_offset, name_size, header_size, header_end;
659	long i, l, length, count;
660	char path[MAXPATHLEN + 5];
661	char name[MAXPATHLEN + 5];
662	char *p, endof_central_dir[22];
663
664	if (strlen(archive) > MAXPATHLEN) {
665		PyErr_SetString(PyExc_OverflowError,
666				"Zip path name is too long");
667		return NULL;
668	}
669	strcpy(path, archive);
670
671	fp = fopen(archive, "rb");
672	if (fp == NULL) {
673		PyErr_Format(ZipImportError, "can't open Zip file: "
674			     "'%.200s'", archive);
675		return NULL;
676	}
677	fseek(fp, -22, SEEK_END);
678	header_end = ftell(fp);
679	if (fread(endof_central_dir, 1, 22, fp) != 22) {
680		fclose(fp);
681		PyErr_Format(ZipImportError, "can't read Zip file: "
682			     "'%.200s'", archive);
683		return NULL;
684	}
685	if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
686		/* Bad: End of Central Dir signature */
687		fclose(fp);
688		PyErr_Format(ZipImportError, "not a Zip file: "
689			     "'%.200s'", archive);
690		return NULL;
691	}
692
693	header_offset = get_long((unsigned char *)endof_central_dir + 16);
694
695	files = PyDict_New();
696	if (files == NULL)
697		goto error;
698
699	length = (long)strlen(path);
700	path[length] = SEP;
701
702	/* Start of Central Directory */
703	count = 0;
704	for (;;) {
705		PyObject *t;
706		int err;
707
708		fseek(fp, header_offset, 0);  /* Start of file header */
709		l = PyMarshal_ReadLongFromFile(fp);
710		if (l != 0x02014B50)
711			break;	/* Bad: Central Dir File Header */
712		fseek(fp, header_offset + 10, 0);
713		compress = PyMarshal_ReadShortFromFile(fp);
714		time = PyMarshal_ReadShortFromFile(fp);
715		date = PyMarshal_ReadShortFromFile(fp);
716		crc = PyMarshal_ReadLongFromFile(fp);
717		data_size = PyMarshal_ReadLongFromFile(fp);
718		file_size = PyMarshal_ReadLongFromFile(fp);
719		name_size = PyMarshal_ReadShortFromFile(fp);
720		header_size = 46 + name_size +
721		   PyMarshal_ReadShortFromFile(fp) +
722		   PyMarshal_ReadShortFromFile(fp);
723		fseek(fp, header_offset + 42, 0);
724		file_offset = PyMarshal_ReadLongFromFile(fp);
725		if (name_size > MAXPATHLEN)
726			name_size = MAXPATHLEN;
727
728		p = name;
729		for (i = 0; i < name_size; i++) {
730			*p = (char)getc(fp);
731			if (*p == '/')
732				*p = SEP;
733			p++;
734		}
735		*p = 0;	/* Add terminating null byte */
736		header_offset += header_size;
737
738		strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
739
740		t = Py_BuildValue("siiiiiii", path, compress, data_size,
741				  file_size, file_offset, time, date, crc);
742		if (t == NULL)
743			goto error;
744		err = PyDict_SetItemString(files, name, t);
745		Py_DECREF(t);
746		if (err != 0)
747			goto error;
748		count++;
749	}
750	fclose(fp);
751	if (Py_VerboseFlag)
752		PySys_WriteStderr("# zipimport: found %ld names in %s\n",
753			count, archive);
754	return files;
755error:
756	fclose(fp);
757	Py_XDECREF(files);
758	return NULL;
759}
760
761/* Return the zlib.decompress function object, or NULL if zlib couldn't
762   be imported. The function is cached when found, so subsequent calls
763   don't import zlib again. Returns a *borrowed* reference.
764   XXX This makes zlib.decompress immortal. */
765static PyObject *
766get_decompress_func(void)
767{
768	static PyObject *decompress = NULL;
769
770	if (decompress == NULL) {
771		PyObject *zlib;
772		static int importing_zlib = 0;
773
774		if (importing_zlib != 0)
775			/* Someone has a zlib.py[co] in their Zip file;
776			   let's avoid a stack overflow. */
777			return NULL;
778		importing_zlib = 1;
779		zlib = PyImport_ImportModule("zlib");	/* import zlib */
780		importing_zlib = 0;
781		if (zlib != NULL) {
782			decompress = PyObject_GetAttrString(zlib,
783							    "decompress");
784			Py_DECREF(zlib);
785		}
786		else
787			PyErr_Clear();
788		if (Py_VerboseFlag)
789			PySys_WriteStderr("# zipimport: zlib %s\n",
790				zlib != NULL ? "available": "UNAVAILABLE");
791	}
792	return decompress;
793}
794
795/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
796   data as a new reference. */
797static PyObject *
798get_data(char *archive, PyObject *toc_entry)
799{
800	PyObject *raw_data, *data = NULL, *decompress;
801	char *buf;
802	FILE *fp;
803	int err, bytes_read = 0;
804	long l;
805	char *datapath;
806	long compress, data_size, file_size, file_offset;
807	long time, date, crc;
808
809	if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
810			      &data_size, &file_size, &file_offset, &time,
811			      &date, &crc)) {
812		return NULL;
813	}
814
815	fp = fopen(archive, "rb");
816	if (!fp) {
817		PyErr_Format(PyExc_IOError,
818		   "zipimport: can not open file %s", archive);
819		return NULL;
820	}
821
822	/* Check to make sure the local file header is correct */
823	fseek(fp, file_offset, 0);
824	l = PyMarshal_ReadLongFromFile(fp);
825	if (l != 0x04034B50) {
826		/* Bad: Local File Header */
827		PyErr_Format(ZipImportError,
828			     "bad local file header in %s",
829			     archive);
830		fclose(fp);
831		return NULL;
832	}
833	fseek(fp, file_offset + 26, 0);
834	l = 30 + PyMarshal_ReadShortFromFile(fp) +
835	    PyMarshal_ReadShortFromFile(fp);	/* local header size */
836	file_offset += l;	/* Start of file data */
837
838	raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
839					      data_size : data_size + 1);
840	if (raw_data == NULL) {
841		fclose(fp);
842		return NULL;
843	}
844	buf = PyString_AsString(raw_data);
845
846	err = fseek(fp, file_offset, 0);
847	if (err == 0)
848		bytes_read = fread(buf, 1, data_size, fp);
849	fclose(fp);
850	if (err || bytes_read != data_size) {
851		PyErr_SetString(PyExc_IOError,
852				"zipimport: can't read data");
853		Py_DECREF(raw_data);
854		return NULL;
855	}
856
857	if (compress != 0) {
858		buf[data_size] = 'Z';  /* saw this in zipfile.py */
859		data_size++;
860	}
861	buf[data_size] = '\0';
862
863	if (compress == 0)  /* data is not compressed */
864		return raw_data;
865
866	/* Decompress with zlib */
867	decompress = get_decompress_func();
868	if (decompress == NULL) {
869		PyErr_SetString(ZipImportError,
870				"can't decompress data; "
871				"zlib not available");
872		goto error;
873	}
874	data = PyObject_CallFunction(decompress, "Ol", raw_data, -15);
875error:
876	Py_DECREF(raw_data);
877	return data;
878}
879
880/* Lenient date/time comparison function. The precision of the mtime
881   in the archive is lower than the mtime stored in a .pyc: we
882   must allow a difference of at most one second. */
883static int
884eq_mtime(time_t t1, time_t t2)
885{
886	time_t d = t1 - t2;
887	if (d < 0)
888		d = -d;
889	/* dostime only stores even seconds, so be lenient */
890	return d <= 1;
891}
892
893/* Given the contents of a .py[co] file in a buffer, unmarshal the data
894   and return the code object. Return None if it the magic word doesn't
895   match (we do this instead of raising an exception as we fall back
896   to .py if available and we don't want to mask other errors).
897   Returns a new reference. */
898static PyObject *
899unmarshal_code(char *pathname, PyObject *data, time_t mtime)
900{
901	PyObject *code;
902	char *buf = PyString_AsString(data);
903	int size = PyString_Size(data);
904
905	if (size <= 9) {
906		PyErr_SetString(ZipImportError,
907				"bad pyc data");
908		return NULL;
909	}
910
911	if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
912		if (Py_VerboseFlag)
913			PySys_WriteStderr("# %s has bad magic\n",
914					  pathname);
915		Py_INCREF(Py_None);
916		return Py_None;  /* signal caller to try alternative */
917	}
918
919	if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
920				    mtime)) {
921		if (Py_VerboseFlag)
922			PySys_WriteStderr("# %s has bad mtime\n",
923					  pathname);
924		Py_INCREF(Py_None);
925		return Py_None;  /* signal caller to try alternative */
926	}
927
928	code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
929	if (code == NULL)
930		return NULL;
931	if (!PyCode_Check(code)) {
932		Py_DECREF(code);
933		PyErr_Format(PyExc_TypeError,
934		     "compiled module %.200s is not a code object",
935		     pathname);
936		return NULL;
937	}
938	return code;
939}
940
941/* Replace any occurances of "\r\n?" in the input string with "\n".
942   This converts DOS and Mac line endings to Unix line endings.
943   Also append a trailing "\n" to be compatible with
944   PyParser_SimpleParseFile(). Returns a new reference. */
945static PyObject *
946normalize_line_endings(PyObject *source)
947{
948	char *buf, *q, *p = PyString_AsString(source);
949	PyObject *fixed_source;
950
951	/* one char extra for trailing \n and one for terminating \0 */
952	buf = PyMem_Malloc(PyString_Size(source) + 2);
953	if (buf == NULL) {
954		PyErr_SetString(PyExc_MemoryError,
955				"zipimport: no memory to allocate "
956				"source buffer");
957		return NULL;
958	}
959	/* replace "\r\n?" by "\n" */
960	for (q = buf; *p != '\0'; p++) {
961		if (*p == '\r') {
962			*q++ = '\n';
963			if (*(p + 1) == '\n')
964				p++;
965		}
966		else
967			*q++ = *p;
968	}
969	*q++ = '\n';  /* add trailing \n */
970	*q = '\0';
971	fixed_source = PyString_FromString(buf);
972	PyMem_Free(buf);
973	return fixed_source;
974}
975
976/* Given a string buffer containing Python source code, compile it
977   return and return a code object as a new reference. */
978static PyObject *
979compile_source(char *pathname, PyObject *source)
980{
981	PyObject *code, *fixed_source;
982
983	fixed_source = normalize_line_endings(source);
984	if (fixed_source == NULL)
985		return NULL;
986
987	code = Py_CompileString(PyString_AsString(fixed_source), pathname,
988				Py_file_input);
989	Py_DECREF(fixed_source);
990	return code;
991}
992
993/* Convert the date/time values found in the Zip archive to a value
994   that's compatible with the time stamp stored in .pyc files. */
995static time_t
996parse_dostime(int dostime, int dosdate)
997{
998	struct tm stm;
999
1000	stm.tm_sec   =  (dostime        & 0x1f) * 2;
1001	stm.tm_min   =  (dostime >> 5)  & 0x3f;
1002	stm.tm_hour  =  (dostime >> 11) & 0x1f;
1003	stm.tm_mday  =   dosdate        & 0x1f;
1004	stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1005	stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1006	stm.tm_isdst =   -1; /* wday/yday is ignored */
1007
1008	return mktime(&stm);
1009}
1010
1011/* Given a path to a .pyc or .pyo file in the archive, return the
1012   modifictaion time of the matching .py file, or 0 if no source
1013   is available. */
1014static time_t
1015get_mtime_of_source(ZipImporter *self, char *path)
1016{
1017	PyObject *toc_entry;
1018	time_t mtime = 0;
1019	int lastchar = strlen(path) - 1;
1020	char savechar = path[lastchar];
1021	path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
1022	toc_entry = PyDict_GetItemString(self->files, path);
1023	if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1024	    PyTuple_Size(toc_entry) == 8) {
1025		/* fetch the time stamp of the .py file for comparison
1026		   with an embedded pyc time stamp */
1027		int time, date;
1028		time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1029		date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1030		mtime = parse_dostime(time, date);
1031	}
1032	path[lastchar] = savechar;
1033	return mtime;
1034}
1035
1036/* Return the code object for the module named by 'fullname' from the
1037   Zip archive as a new reference. */
1038static PyObject *
1039get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1040		   time_t mtime, PyObject *toc_entry)
1041{
1042	PyObject *data, *code;
1043	char *modpath;
1044	char *archive = PyString_AsString(self->archive);
1045
1046	if (archive == NULL)
1047		return NULL;
1048
1049	data = get_data(archive, toc_entry);
1050	if (data == NULL)
1051		return NULL;
1052
1053	modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1054
1055	if (isbytecode) {
1056		code = unmarshal_code(modpath, data, mtime);
1057	}
1058	else {
1059		code = compile_source(modpath, data);
1060	}
1061	Py_DECREF(data);
1062	return code;
1063}
1064
1065/* Get the code object assoiciated with the module specified by
1066   'fullname'. */
1067static PyObject *
1068get_module_code(ZipImporter *self, char *fullname,
1069		int *p_ispackage, char **p_modpath)
1070{
1071	PyObject *toc_entry;
1072	char *subname, path[MAXPATHLEN + 1];
1073	int len;
1074	struct st_zip_searchorder *zso;
1075
1076	subname = get_subname(fullname);
1077
1078	len = make_filename(PyString_AsString(self->prefix), subname, path);
1079	if (len < 0)
1080		return NULL;
1081
1082	for (zso = zip_searchorder; *zso->suffix; zso++) {
1083		PyObject *code = NULL;
1084
1085		strcpy(path + len, zso->suffix);
1086		if (Py_VerboseFlag > 1)
1087			PySys_WriteStderr("# trying %s%c%s\n",
1088					  PyString_AsString(self->archive),
1089					  SEP, path);
1090		toc_entry = PyDict_GetItemString(self->files, path);
1091		if (toc_entry != NULL) {
1092			time_t mtime = 0;
1093			int ispackage = zso->type & IS_PACKAGE;
1094			int isbytecode = zso->type & IS_BYTECODE;
1095
1096			if (isbytecode)
1097				mtime = get_mtime_of_source(self, path);
1098			if (p_ispackage != NULL)
1099				*p_ispackage = ispackage;
1100			code = get_code_from_data(self, ispackage,
1101						  isbytecode, mtime,
1102						  toc_entry);
1103			if (code == Py_None) {
1104				/* bad magic number or non-matching mtime
1105				   in byte code, try next */
1106				Py_DECREF(code);
1107				continue;
1108			}
1109			if (code != NULL && p_modpath != NULL)
1110				*p_modpath = PyString_AsString(
1111					PyTuple_GetItem(toc_entry, 0));
1112			return code;
1113		}
1114	}
1115	PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1116	return NULL;
1117}
1118
1119
1120/* Module init */
1121
1122PyDoc_STRVAR(zipimport_doc,
1123"zipimport provides support for importing Python modules from Zip archives.\n\
1124\n\
1125This module exports three objects:\n\
1126- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1127- ZipImporterError: exception raised by zipimporter objects. It's a\n\
1128  subclass of ImportError, so it can be caught as ImportError, too.\n\
1129- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1130  info dicts, as used in zipimporter._files.\n\
1131\n\
1132It is usually not needed to use the zipimport module explicitly; it is\n\
1133used by the builtin import mechanism for sys.path items that are paths\n\
1134to Zip archives.");
1135
1136PyMODINIT_FUNC
1137initzipimport(void)
1138{
1139	PyObject *mod;
1140
1141	if (PyType_Ready(&ZipImporter_Type) < 0)
1142		return;
1143
1144	/* Correct directory separator */
1145	zip_searchorder[0].suffix[0] = SEP;
1146	zip_searchorder[1].suffix[0] = SEP;
1147	zip_searchorder[2].suffix[0] = SEP;
1148	if (Py_OptimizeFlag) {
1149		/* Reverse *.pyc and *.pyo */
1150		struct st_zip_searchorder tmp;
1151		tmp = zip_searchorder[0];
1152		zip_searchorder[0] = zip_searchorder[1];
1153		zip_searchorder[1] = tmp;
1154		tmp = zip_searchorder[3];
1155		zip_searchorder[3] = zip_searchorder[4];
1156		zip_searchorder[4] = tmp;
1157	}
1158
1159	mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1160			     NULL, PYTHON_API_VERSION);
1161
1162	ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1163					    PyExc_ImportError, NULL);
1164	if (ZipImportError == NULL)
1165		return;
1166
1167	Py_INCREF(ZipImportError);
1168	if (PyModule_AddObject(mod, "ZipImportError",
1169			       ZipImportError) < 0)
1170		return;
1171
1172	Py_INCREF(&ZipImporter_Type);
1173	if (PyModule_AddObject(mod, "zipimporter",
1174			       (PyObject *)&ZipImporter_Type) < 0)
1175		return;
1176
1177	zip_directory_cache = PyDict_New();
1178	if (zip_directory_cache == NULL)
1179		return;
1180	Py_INCREF(zip_directory_cache);
1181	if (PyModule_AddObject(mod, "_zip_directory_cache",
1182			       zip_directory_cache) < 0)
1183		return;
1184}
1185