1/*
2 * mergesort() implementation for systems that don't have it.
3 *
4 * Copyright (c) 1992, 1993
5 *      The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Peter McIlroy.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34#include "util.h"
35
36#if defined(LIBC_SCCS) && !defined(lint)
37static char sccsid[] = "@(#)merge.c     8.2 (Berkeley) 2/14/94";
38#endif /* LIBC_SCCS and not lint */
39
40#ifdef HAVE_MERGESORT
41#undef yasm__mergesort
42#endif
43
44#ifndef HAVE_MERGESORT
45/*
46 * Hybrid exponential search/linear search merge sort with hybrid
47 * natural/pairwise first pass.  Requires about .3% more comparisons
48 * for random data than LSMS with pairwise first pass alone.
49 * It works for objects as small as two bytes.
50 */
51
52#define NATURAL
53#define THRESHOLD 16    /* Best choice for natural merge cut-off. */
54
55/* #define NATURAL to get hybrid natural merge.
56 * (The default is pairwise merging.)
57 */
58
59#include <errno.h>
60#include <string.h>
61
62static void setup(unsigned char *, unsigned char *, size_t, size_t,
63                  int (*)(const void *, const void *));
64static void insertionsort(unsigned char *, size_t, size_t,
65                          int (*)(const void *, const void *));
66
67#define ISIZE sizeof(int)
68#define PSIZE sizeof(unsigned char *)
69#define ICOPY_LIST(src, dst, last)                              \
70        do                                                      \
71        *(int*)dst = *(int*)src, src += ISIZE, dst += ISIZE;    \
72        while(src < last)
73#define ICOPY_ELT(src, dst, i)                                  \
74        do                                                      \
75        *(int*) dst = *(int*) src, src += ISIZE, dst += ISIZE;  \
76        while (i -= ISIZE)
77
78#define CCOPY_LIST(src, dst, last)              \
79        do                                      \
80                *dst++ = *src++;                \
81        while (src < last)
82#define CCOPY_ELT(src, dst, i)                  \
83        do                                      \
84                *dst++ = *src++;                \
85        while (i -= 1)
86
87/*
88 * Find the next possible pointer head.  (Trickery for forcing an array
89 * to do double duty as a linked list when objects do not align with word
90 * boundaries.
91 */
92/* Assumption: PSIZE is a power of 2. */
93#define EVAL(p) (unsigned char **)                                              \
94        ((unsigned char *)0 +                                                   \
95            (((unsigned char *)p + PSIZE - 1 - (unsigned char *) 0) & ~(PSIZE - 1)))
96#endif  /*HAVE_MERGESORT*/
97
98/*
99 * Arguments are as for qsort.
100 */
101int
102yasm__mergesort(void *base, size_t nmemb, size_t size,
103                int (*cmp)(const void *, const void *))
104{
105#ifdef HAVE_MERGESORT
106    return mergesort(base, nmemb, size, cmp);
107#else
108        size_t i;
109        int sense;
110        int big, iflag;
111        unsigned char *f1, *f2, *t, *b, *tp2, *q, *l1, *l2;
112        unsigned char *list2, *list1, *p2, *p, *last, **p1;
113
114        if (size < PSIZE / 2) {         /* Pointers must fit into 2 * size. */
115#ifdef EINVAL
116                errno = EINVAL;
117#endif
118                return (-1);
119        }
120
121        if (nmemb == 0)
122                return (0);
123
124        /*
125         * XXX
126         * Stupid subtraction for the Cray.
127         */
128        iflag = 0;
129        if (!(size % ISIZE) && !(((char *)base - (char *)0) % ISIZE))
130                iflag = 1;
131
132        if ((list2 = yasm_xmalloc(nmemb * size + PSIZE)) == NULL)
133                return (-1);
134
135        list1 = base;
136        setup(list1, list2, nmemb, size, cmp);
137        last = list2 + nmemb * size;
138        i = 0;
139        big = 0;
140        while (*EVAL(list2) != last) {
141            l2 = list1;
142            p1 = EVAL(list1);
143            for (tp2 = p2 = list2; p2 != last; p1 = EVAL(l2)) {
144                p2 = *EVAL(p2);
145                f1 = l2;
146                f2 = l1 = list1 + (p2 - list2);
147                if (p2 != last)
148                        p2 = *EVAL(p2);
149                l2 = list1 + (p2 - list2);
150                while (f1 < l1 && f2 < l2) {
151                        if ((*cmp)(f1, f2) <= 0) {
152                                q = f2;
153                                b = f1, t = l1;
154                                sense = -1;
155                        } else {
156                                q = f1;
157                                b = f2, t = l2;
158                                sense = 0;
159                        }
160                        if (!big) {     /* here i = 0 */
161                                while ((b += size) < t && cmp(q, b) >sense)
162                                        if (++i == 6) {
163                                                big = 1;
164                                                goto EXPONENTIAL;
165                                        }
166                        } else {
167EXPONENTIAL:                    for (i = size; ; i <<= 1)
168                                        if ((p = (b + i)) >= t) {
169                                                if ((p = t - size) > b &&
170                                                    (*cmp)(q, p) <= sense)
171                                                        t = p;
172                                                else
173                                                        b = p;
174                                                break;
175                                        } else if ((*cmp)(q, p) <= sense) {
176                                                t = p;
177                                                if (i == size)
178                                                        big = 0;
179                                                goto FASTCASE;
180                                        } else
181                                                b = p;
182                                while (t > b+size) {
183                                        i = (((t - b) / size) >> 1) * size;
184                                        if ((*cmp)(q, p = b + i) <= sense)
185                                                t = p;
186                                        else
187                                                b = p;
188                                }
189                                goto COPY;
190FASTCASE:                       while (i > size)
191                                        if ((*cmp)(q,
192                                                p = b + (i >>= 1)) <= sense)
193                                                t = p;
194                                        else
195                                                b = p;
196COPY:                           b = t;
197                        }
198                        i = size;
199                        if (q == f1) {
200                                if (iflag) {
201                                        ICOPY_LIST(f2, tp2, b);
202                                        ICOPY_ELT(f1, tp2, i);
203                                } else {
204                                        CCOPY_LIST(f2, tp2, b);
205                                        CCOPY_ELT(f1, tp2, i);
206                                }
207                        } else {
208                                if (iflag) {
209                                        ICOPY_LIST(f1, tp2, b);
210                                        ICOPY_ELT(f2, tp2, i);
211                                } else {
212                                        CCOPY_LIST(f1, tp2, b);
213                                        CCOPY_ELT(f2, tp2, i);
214                                }
215                        }
216                }
217                if (f2 < l2) {
218                        if (iflag)
219                                ICOPY_LIST(f2, tp2, l2);
220                        else
221                                CCOPY_LIST(f2, tp2, l2);
222                } else if (f1 < l1) {
223                        if (iflag)
224                                ICOPY_LIST(f1, tp2, l1);
225                        else
226                                CCOPY_LIST(f1, tp2, l1);
227                }
228                *p1 = l2;
229            }
230            tp2 = list1;        /* swap list1, list2 */
231            list1 = list2;
232            list2 = tp2;
233            last = list2 + nmemb*size;
234        }
235        if (base == list2) {
236                memmove(list2, list1, nmemb*size);
237                list2 = list1;
238        }
239        yasm_xfree(list2);
240        return (0);
241#endif  /*HAVE_MERGESORT*/
242}
243
244#ifndef HAVE_MERGESORT
245
246#define swap(a, b) {                                    \
247                s = b;                                  \
248                i = size;                               \
249                do {                                    \
250                        tmp = *a; *a++ = *s; *s++ = tmp; \
251                } while (--i);                          \
252                a -= size;                              \
253        }
254#define reverse(bot, top) {                             \
255        s = top;                                        \
256        do {                                            \
257                i = size;                               \
258                do {                                    \
259                        tmp = *bot; *bot++ = *s; *s++ = tmp; \
260                } while (--i);                          \
261                s -= size2;                             \
262        } while(bot < s);                               \
263}
264
265/*
266 * Optional hybrid natural/pairwise first pass.  Eats up list1 in runs of
267 * increasing order, list2 in a corresponding linked list.  Checks for runs
268 * when THRESHOLD/2 pairs compare with same sense.  (Only used when NATURAL
269 * is defined.  Otherwise simple pairwise merging is used.)
270 */
271void
272setup(unsigned char *list1, unsigned char *list2, size_t n, size_t size,
273      int (*cmp)(const void *, const void *))
274{
275        size_t i;
276        unsigned int tmp;
277        int length, sense;
278        size_t size2;
279        unsigned char *f1, *f2, *s, *l2, *last, *p2;
280
281        size2 = size*2;
282        if (n <= 5) {
283                insertionsort(list1, n, size, cmp);
284                *EVAL(list2) = (unsigned char*) list2 + n*size;
285                return;
286        }
287        /*
288         * Avoid running pointers out of bounds; limit n to evens
289         * for simplicity.
290         */
291        i = 4 + (n & 1);
292        insertionsort(list1 + (n - i) * size, i, size, cmp);
293        last = list1 + size * (n - i);
294        *EVAL(list2 + (last - list1)) = list2 + n * size;
295
296#ifdef NATURAL
297        p2 = list2;
298        f1 = list1;
299        sense = (cmp(f1, f1 + size) > 0);
300        for (; f1 < last; sense = !sense) {
301                length = 2;
302                                        /* Find pairs with same sense. */
303                for (f2 = f1 + size2; f2 < last; f2 += size2) {
304                        if ((cmp(f2, f2+ size) > 0) != sense)
305                                break;
306                        length += 2;
307                }
308                if (length < THRESHOLD) {               /* Pairwise merge */
309                        do {
310                                p2 = *EVAL(p2) = f1 + size2 - list1 + list2;
311                                if (sense > 0)
312                                        swap (f1, f1 + size);
313                        } while ((f1 += size2) < f2);
314                } else {                                /* Natural merge */
315                        l2 = f2;
316                        for (f2 = f1 + size2; f2 < l2; f2 += size2) {
317                                if ((cmp(f2-size, f2) > 0) != sense) {
318                                        p2 = *EVAL(p2) = f2 - list1 + list2;
319                                        if (sense > 0)
320                                                reverse(f1, f2-size);
321                                        f1 = f2;
322                                }
323                        }
324                        if (sense > 0)
325                                reverse (f1, f2-size);
326                        f1 = f2;
327                        if (f2 < last || cmp(f2 - size, f2) > 0)
328                                p2 = *EVAL(p2) = f2 - list1 + list2;
329                        else
330                                p2 = *EVAL(p2) = list2 + n*size;
331                }
332        }
333#else           /* pairwise merge only. */
334        for (f1 = list1, p2 = list2; f1 < last; f1 += size2) {
335                p2 = *EVAL(p2) = p2 + size2;
336                if (cmp (f1, f1 + size) > 0)
337                        swap(f1, f1 + size);
338        }
339#endif /* NATURAL */
340}
341
342/*
343 * This is to avoid out-of-bounds addresses in sorting the
344 * last 4 elements.
345 */
346static void
347insertionsort(unsigned char *a, size_t n, size_t size,
348              int (*cmp)(const void *, const void *))
349{
350        unsigned char *ai, *s, *t, *u, tmp;
351        size_t i;
352
353        for (ai = a+size; --n >= 1; ai += size)
354                for (t = ai; t > a; t -= size) {
355                        u = t - size;
356                        if (cmp(u, t) <= 0)
357                                break;
358                        swap(u, t);
359                }
360}
361#endif  /*HAVE_MERGESORT*/
362