_heapqmodule.c revision ed6254acf289839467a77419566b475f5950f475
1/* Drop in replacement for heapq.py
2
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
5annotated by Fran�ois Pinard, and converted to C by Raymond Hettinger.
6
7*/
8
9#include "Python.h"
10
11static int
12_siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
13{
14	PyObject *newitem, *parent;
15	int cmp;
16	Py_ssize_t parentpos;
17
18	assert(PyList_Check(heap));
19	if (pos >= PyList_GET_SIZE(heap)) {
20		PyErr_SetString(PyExc_IndexError, "index out of range");
21		return -1;
22	}
23
24	newitem = PyList_GET_ITEM(heap, pos);
25	Py_INCREF(newitem);
26	/* Follow the path to the root, moving parents down until finding
27	   a place newitem fits. */
28	while (pos > startpos){
29		parentpos = (pos - 1) >> 1;
30		parent = PyList_GET_ITEM(heap, parentpos);
31		cmp = PyObject_RichCompareBool(parent, newitem, Py_LE);
32		if (cmp == -1) {
33			Py_DECREF(newitem);
34			return -1;
35		}
36		if (cmp == 1)
37			break;
38		Py_INCREF(parent);
39		Py_DECREF(PyList_GET_ITEM(heap, pos));
40		PyList_SET_ITEM(heap, pos, parent);
41		pos = parentpos;
42	}
43	Py_DECREF(PyList_GET_ITEM(heap, pos));
44	PyList_SET_ITEM(heap, pos, newitem);
45	return 0;
46}
47
48static int
49_siftup(PyListObject *heap, Py_ssize_t pos)
50{
51	Py_ssize_t startpos, endpos, childpos, rightpos;
52	int cmp;
53	PyObject *newitem, *tmp;
54
55	assert(PyList_Check(heap));
56	endpos = PyList_GET_SIZE(heap);
57	startpos = pos;
58	if (pos >= endpos) {
59		PyErr_SetString(PyExc_IndexError, "index out of range");
60		return -1;
61	}
62	newitem = PyList_GET_ITEM(heap, pos);
63	Py_INCREF(newitem);
64
65	/* Bubble up the smaller child until hitting a leaf. */
66	childpos = 2*pos + 1;    /* leftmost child position  */
67	while (childpos < endpos) {
68		/* Set childpos to index of smaller child.   */
69		rightpos = childpos + 1;
70		if (rightpos < endpos) {
71			cmp = PyObject_RichCompareBool(
72				PyList_GET_ITEM(heap, rightpos),
73				PyList_GET_ITEM(heap, childpos),
74				Py_LE);
75			if (cmp == -1) {
76				Py_DECREF(newitem);
77				return -1;
78			}
79			if (cmp == 1)
80				childpos = rightpos;
81		}
82		/* Move the smaller child up. */
83		tmp = PyList_GET_ITEM(heap, childpos);
84		Py_INCREF(tmp);
85		Py_DECREF(PyList_GET_ITEM(heap, pos));
86		PyList_SET_ITEM(heap, pos, tmp);
87		pos = childpos;
88		childpos = 2*pos + 1;
89	}
90
91	/* The leaf at pos is empty now.  Put newitem there, and and bubble
92	   it up to its final resting place (by sifting its parents down). */
93	Py_DECREF(PyList_GET_ITEM(heap, pos));
94	PyList_SET_ITEM(heap, pos, newitem);
95	return _siftdown(heap, startpos, pos);
96}
97
98static PyObject *
99heappush(PyObject *self, PyObject *args)
100{
101	PyObject *heap, *item;
102
103	if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
104		return NULL;
105
106	if (!PyList_Check(heap)) {
107		PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
108		return NULL;
109	}
110
111	if (PyList_Append(heap, item) == -1)
112		return NULL;
113
114	if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
115		return NULL;
116	Py_INCREF(Py_None);
117	return Py_None;
118}
119
120PyDoc_STRVAR(heappush_doc,
121"Push item onto heap, maintaining the heap invariant.");
122
123static PyObject *
124heappop(PyObject *self, PyObject *heap)
125{
126	PyObject *lastelt, *returnitem;
127	Py_ssize_t n;
128
129	if (!PyList_Check(heap)) {
130		PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
131		return NULL;
132	}
133
134	/* # raises appropriate IndexError if heap is empty */
135	n = PyList_GET_SIZE(heap);
136	if (n == 0) {
137		PyErr_SetString(PyExc_IndexError, "index out of range");
138		return NULL;
139	}
140
141	lastelt = PyList_GET_ITEM(heap, n-1) ;
142	Py_INCREF(lastelt);
143	PyList_SetSlice(heap, n-1, n, NULL);
144	n--;
145
146	if (!n)
147		return lastelt;
148	returnitem = PyList_GET_ITEM(heap, 0);
149	PyList_SET_ITEM(heap, 0, lastelt);
150	if (_siftup((PyListObject *)heap, 0) == -1) {
151		Py_DECREF(returnitem);
152		return NULL;
153	}
154	return returnitem;
155}
156
157PyDoc_STRVAR(heappop_doc,
158"Pop the smallest item off the heap, maintaining the heap invariant.");
159
160static PyObject *
161heapreplace(PyObject *self, PyObject *args)
162{
163	PyObject *heap, *item, *returnitem;
164
165	if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
166		return NULL;
167
168	if (!PyList_Check(heap)) {
169		PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
170		return NULL;
171	}
172
173	if (PyList_GET_SIZE(heap) < 1) {
174		PyErr_SetString(PyExc_IndexError, "index out of range");
175		return NULL;
176	}
177
178	returnitem = PyList_GET_ITEM(heap, 0);
179	Py_INCREF(item);
180	PyList_SET_ITEM(heap, 0, item);
181	if (_siftup((PyListObject *)heap, 0) == -1) {
182		Py_DECREF(returnitem);
183		return NULL;
184	}
185	return returnitem;
186}
187
188PyDoc_STRVAR(heapreplace_doc,
189"Pop and return the current smallest value, and add the new item.\n\
190\n\
191This is more efficient than heappop() followed by heappush(), and can be\n\
192more appropriate when using a fixed-size heap.  Note that the value\n\
193returned may be larger than item!  That constrains reasonable uses of\n\
194this routine unless written as part of a conditional replacement:\n\n\
195        if item > heap[0]:\n\
196            item = heapreplace(heap, item)\n");
197
198static PyObject *
199heapify(PyObject *self, PyObject *heap)
200{
201	Py_ssize_t i, n;
202
203	if (!PyList_Check(heap)) {
204		PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
205		return NULL;
206	}
207
208	n = PyList_GET_SIZE(heap);
209	/* Transform bottom-up.  The largest index there's any point to
210	   looking at is the largest with a child index in-range, so must
211	   have 2*i + 1 < n, or i < (n-1)/2.  If n is even = 2*j, this is
212	   (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1.  If
213	   n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
214	   and that's again n//2-1.
215	*/
216	for (i=n/2-1 ; i>=0 ; i--)
217		if(_siftup((PyListObject *)heap, i) == -1)
218			return NULL;
219	Py_INCREF(Py_None);
220	return Py_None;
221}
222
223PyDoc_STRVAR(heapify_doc,
224"Transform list into a heap, in-place, in O(len(heap)) time.");
225
226static PyObject *
227nlargest(PyObject *self, PyObject *args)
228{
229	PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
230	int i, n;
231
232	if (!PyArg_ParseTuple(args, "iO:nlargest", &n, &iterable))
233		return NULL;
234
235	it = PyObject_GetIter(iterable);
236	if (it == NULL)
237		return NULL;
238
239	heap = PyList_New(0);
240	if (heap == NULL)
241		goto fail;
242
243	for (i=0 ; i<n ; i++ ){
244		elem = PyIter_Next(it);
245		if (elem == NULL) {
246			if (PyErr_Occurred())
247				goto fail;
248			else
249				goto sortit;
250		}
251		if (PyList_Append(heap, elem) == -1) {
252			Py_DECREF(elem);
253			goto fail;
254		}
255		Py_DECREF(elem);
256	}
257	if (PyList_GET_SIZE(heap) == 0)
258		goto sortit;
259
260	for (i=n/2-1 ; i>=0 ; i--)
261		if(_siftup((PyListObject *)heap, i) == -1)
262			goto fail;
263
264	sol = PyList_GET_ITEM(heap, 0);
265	while (1) {
266		elem = PyIter_Next(it);
267		if (elem == NULL) {
268			if (PyErr_Occurred())
269				goto fail;
270			else
271				goto sortit;
272		}
273		if (PyObject_RichCompareBool(elem, sol, Py_LE)) {
274			Py_DECREF(elem);
275			continue;
276		}
277		oldelem = PyList_GET_ITEM(heap, 0);
278		PyList_SET_ITEM(heap, 0, elem);
279		Py_DECREF(oldelem);
280		if (_siftup((PyListObject *)heap, 0) == -1)
281			goto fail;
282		sol = PyList_GET_ITEM(heap, 0);
283	}
284sortit:
285	if (PyList_Sort(heap) == -1)
286		goto fail;
287	if (PyList_Reverse(heap) == -1)
288		goto fail;
289	Py_DECREF(it);
290	return heap;
291
292fail:
293	Py_DECREF(it);
294	Py_XDECREF(heap);
295	return NULL;
296}
297
298PyDoc_STRVAR(nlargest_doc,
299"Find the n largest elements in a dataset.\n\
300\n\
301Equivalent to:  sorted(iterable, reverse=True)[:n]\n");
302
303static int
304_siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
305{
306	PyObject *newitem, *parent;
307	int cmp;
308	Py_ssize_t parentpos;
309
310	assert(PyList_Check(heap));
311	if (pos >= PyList_GET_SIZE(heap)) {
312		PyErr_SetString(PyExc_IndexError, "index out of range");
313		return -1;
314	}
315
316	newitem = PyList_GET_ITEM(heap, pos);
317	Py_INCREF(newitem);
318	/* Follow the path to the root, moving parents down until finding
319	   a place newitem fits. */
320	while (pos > startpos){
321		parentpos = (pos - 1) >> 1;
322		parent = PyList_GET_ITEM(heap, parentpos);
323		cmp = PyObject_RichCompareBool(newitem, parent, Py_LE);
324		if (cmp == -1) {
325			Py_DECREF(newitem);
326			return -1;
327		}
328		if (cmp == 1)
329			break;
330		Py_INCREF(parent);
331		Py_DECREF(PyList_GET_ITEM(heap, pos));
332		PyList_SET_ITEM(heap, pos, parent);
333		pos = parentpos;
334	}
335	Py_DECREF(PyList_GET_ITEM(heap, pos));
336	PyList_SET_ITEM(heap, pos, newitem);
337	return 0;
338}
339
340static int
341_siftupmax(PyListObject *heap, Py_ssize_t pos)
342{
343	Py_ssize_t startpos, endpos, childpos, rightpos;
344	int cmp;
345	PyObject *newitem, *tmp;
346
347	assert(PyList_Check(heap));
348	endpos = PyList_GET_SIZE(heap);
349	startpos = pos;
350	if (pos >= endpos) {
351		PyErr_SetString(PyExc_IndexError, "index out of range");
352		return -1;
353	}
354	newitem = PyList_GET_ITEM(heap, pos);
355	Py_INCREF(newitem);
356
357	/* Bubble up the smaller child until hitting a leaf. */
358	childpos = 2*pos + 1;    /* leftmost child position  */
359	while (childpos < endpos) {
360		/* Set childpos to index of smaller child.   */
361		rightpos = childpos + 1;
362		if (rightpos < endpos) {
363			cmp = PyObject_RichCompareBool(
364				PyList_GET_ITEM(heap, childpos),
365				PyList_GET_ITEM(heap, rightpos),
366				Py_LE);
367			if (cmp == -1) {
368				Py_DECREF(newitem);
369				return -1;
370			}
371			if (cmp == 1)
372				childpos = rightpos;
373		}
374		/* Move the smaller child up. */
375		tmp = PyList_GET_ITEM(heap, childpos);
376		Py_INCREF(tmp);
377		Py_DECREF(PyList_GET_ITEM(heap, pos));
378		PyList_SET_ITEM(heap, pos, tmp);
379		pos = childpos;
380		childpos = 2*pos + 1;
381	}
382
383	/* The leaf at pos is empty now.  Put newitem there, and and bubble
384	   it up to its final resting place (by sifting its parents down). */
385	Py_DECREF(PyList_GET_ITEM(heap, pos));
386	PyList_SET_ITEM(heap, pos, newitem);
387	return _siftdownmax(heap, startpos, pos);
388}
389
390static PyObject *
391nsmallest(PyObject *self, PyObject *args)
392{
393	PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
394	Py_ssize_t i, n;
395
396	if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
397		return NULL;
398
399	it = PyObject_GetIter(iterable);
400	if (it == NULL)
401		return NULL;
402
403	heap = PyList_New(0);
404	if (heap == NULL)
405		goto fail;
406
407	for (i=0 ; i<n ; i++ ){
408		elem = PyIter_Next(it);
409		if (elem == NULL) {
410			if (PyErr_Occurred())
411				goto fail;
412			else
413				goto sortit;
414		}
415		if (PyList_Append(heap, elem) == -1) {
416			Py_DECREF(elem);
417			goto fail;
418		}
419		Py_DECREF(elem);
420	}
421	n = PyList_GET_SIZE(heap);
422	if (n == 0)
423		goto sortit;
424
425	for (i=n/2-1 ; i>=0 ; i--)
426		if(_siftupmax((PyListObject *)heap, i) == -1)
427			goto fail;
428
429	los = PyList_GET_ITEM(heap, 0);
430	while (1) {
431		elem = PyIter_Next(it);
432		if (elem == NULL) {
433			if (PyErr_Occurred())
434				goto fail;
435			else
436				goto sortit;
437		}
438		if (PyObject_RichCompareBool(los, elem, Py_LE)) {
439			Py_DECREF(elem);
440			continue;
441		}
442
443		oldelem = PyList_GET_ITEM(heap, 0);
444		PyList_SET_ITEM(heap, 0, elem);
445		Py_DECREF(oldelem);
446		if (_siftupmax((PyListObject *)heap, 0) == -1)
447			goto fail;
448		los = PyList_GET_ITEM(heap, 0);
449	}
450
451sortit:
452	if (PyList_Sort(heap) == -1)
453		goto fail;
454	Py_DECREF(it);
455	return heap;
456
457fail:
458	Py_DECREF(it);
459	Py_XDECREF(heap);
460	return NULL;
461}
462
463PyDoc_STRVAR(nsmallest_doc,
464"Find the n smallest elements in a dataset.\n\
465\n\
466Equivalent to:  sorted(iterable)[:n]\n");
467
468static PyMethodDef heapq_methods[] = {
469	{"heappush",	(PyCFunction)heappush,
470		METH_VARARGS,	heappush_doc},
471	{"heappop",	(PyCFunction)heappop,
472		METH_O,		heappop_doc},
473	{"heapreplace",	(PyCFunction)heapreplace,
474		METH_VARARGS,	heapreplace_doc},
475	{"heapify",	(PyCFunction)heapify,
476		METH_O,		heapify_doc},
477	{"nlargest",	(PyCFunction)nlargest,
478		METH_VARARGS,	nlargest_doc},
479	{"nsmallest",	(PyCFunction)nsmallest,
480		METH_VARARGS,	nsmallest_doc},
481	{NULL,		NULL}		/* sentinel */
482};
483
484PyDoc_STRVAR(module_doc,
485"Heap queue algorithm (a.k.a. priority queue).\n\
486\n\
487Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
488all k, counting elements from 0.  For the sake of comparison,\n\
489non-existing elements are considered to be infinite.  The interesting\n\
490property of a heap is that a[0] is always its smallest element.\n\
491\n\
492Usage:\n\
493\n\
494heap = []            # creates an empty heap\n\
495heappush(heap, item) # pushes a new item on the heap\n\
496item = heappop(heap) # pops the smallest item from the heap\n\
497item = heap[0]       # smallest item on the heap without popping it\n\
498heapify(x)           # transforms list into a heap, in-place, in linear time\n\
499item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
500                               # new item; the heap size is unchanged\n\
501\n\
502Our API differs from textbook heap algorithms as follows:\n\
503\n\
504- We use 0-based indexing.  This makes the relationship between the\n\
505  index for a node and the indexes for its children slightly less\n\
506  obvious, but is more suitable since Python uses 0-based indexing.\n\
507\n\
508- Our heappop() method returns the smallest item, not the largest.\n\
509\n\
510These two make it possible to view the heap as a regular Python list\n\
511without surprises: heap[0] is the smallest item, and heap.sort()\n\
512maintains the heap invariant!\n");
513
514
515PyDoc_STRVAR(__about__,
516"Heap queues\n\
517\n\
518[explanation by Fran�ois Pinard]\n\
519\n\
520Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
521all k, counting elements from 0.  For the sake of comparison,\n\
522non-existing elements are considered to be infinite.  The interesting\n\
523property of a heap is that a[0] is always its smallest element.\n"
524"\n\
525The strange invariant above is meant to be an efficient memory\n\
526representation for a tournament.  The numbers below are `k', not a[k]:\n\
527\n\
528                                   0\n\
529\n\
530                  1                                 2\n\
531\n\
532          3               4                5               6\n\
533\n\
534      7       8       9       10      11      12      13      14\n\
535\n\
536    15 16   17 18   19 20   21 22   23 24   25 26   27 28   29 30\n\
537\n\
538\n\
539In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'.  In\n\
540an usual binary tournament we see in sports, each cell is the winner\n\
541over the two cells it tops, and we can trace the winner down the tree\n\
542to see all opponents s/he had.  However, in many computer applications\n\
543of such tournaments, we do not need to trace the history of a winner.\n\
544To be more memory efficient, when a winner is promoted, we try to\n\
545replace it by something else at a lower level, and the rule becomes\n\
546that a cell and the two cells it tops contain three different items,\n\
547but the top cell \"wins\" over the two topped cells.\n"
548"\n\
549If this heap invariant is protected at all time, index 0 is clearly\n\
550the overall winner.  The simplest algorithmic way to remove it and\n\
551find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
552diagram above) into the 0 position, and then percolate this new 0 down\n\
553the tree, exchanging values, until the invariant is re-established.\n\
554This is clearly logarithmic on the total number of items in the tree.\n\
555By iterating over all items, you get an O(n ln n) sort.\n"
556"\n\
557A nice feature of this sort is that you can efficiently insert new\n\
558items while the sort is going on, provided that the inserted items are\n\
559not \"better\" than the last 0'th element you extracted.  This is\n\
560especially useful in simulation contexts, where the tree holds all\n\
561incoming events, and the \"win\" condition means the smallest scheduled\n\
562time.  When an event schedule other events for execution, they are\n\
563scheduled into the future, so they can easily go into the heap.  So, a\n\
564heap is a good structure for implementing schedulers (this is what I\n\
565used for my MIDI sequencer :-).\n"
566"\n\
567Various structures for implementing schedulers have been extensively\n\
568studied, and heaps are good for this, as they are reasonably speedy,\n\
569the speed is almost constant, and the worst case is not much different\n\
570than the average case.  However, there are other representations which\n\
571are more efficient overall, yet the worst cases might be terrible.\n"
572"\n\
573Heaps are also very useful in big disk sorts.  You most probably all\n\
574know that a big sort implies producing \"runs\" (which are pre-sorted\n\
575sequences, which size is usually related to the amount of CPU memory),\n\
576followed by a merging passes for these runs, which merging is often\n\
577very cleverly organised[1].  It is very important that the initial\n\
578sort produces the longest runs possible.  Tournaments are a good way\n\
579to that.  If, using all the memory available to hold a tournament, you\n\
580replace and percolate items that happen to fit the current run, you'll\n\
581produce runs which are twice the size of the memory for random input,\n\
582and much better for input fuzzily ordered.\n"
583"\n\
584Moreover, if you output the 0'th item on disk and get an input which\n\
585may not fit in the current tournament (because the value \"wins\" over\n\
586the last output value), it cannot fit in the heap, so the size of the\n\
587heap decreases.  The freed memory could be cleverly reused immediately\n\
588for progressively building a second heap, which grows at exactly the\n\
589same rate the first heap is melting.  When the first heap completely\n\
590vanishes, you switch heaps and start a new run.  Clever and quite\n\
591effective!\n\
592\n\
593In a word, heaps are useful memory structures to know.  I use them in\n\
594a few applications, and I think it is good to keep a `heap' module\n\
595around. :-)\n"
596"\n\
597--------------------\n\
598[1] The disk balancing algorithms which are current, nowadays, are\n\
599more annoying than clever, and this is a consequence of the seeking\n\
600capabilities of the disks.  On devices which cannot seek, like big\n\
601tape drives, the story was quite different, and one had to be very\n\
602clever to ensure (far in advance) that each tape movement will be the\n\
603most effective possible (that is, will best participate at\n\
604\"progressing\" the merge).  Some tapes were even able to read\n\
605backwards, and this was also used to avoid the rewinding time.\n\
606Believe me, real good tape sorts were quite spectacular to watch!\n\
607From all times, sorting has always been a Great Art! :-)\n");
608
609PyMODINIT_FUNC
610init_heapq(void)
611{
612	PyObject *m;
613
614	m = Py_InitModule3("_heapq", heapq_methods, module_doc);
615	if (m == NULL)
616    		return;
617	PyModule_AddObject(m, "__about__", PyString_FromString(__about__));
618}
619
620