1/* qsort.c 2 * (c) 1998 Gareth McCaughan 3 * 4 * This is a drop-in replacement for the C library's |qsort()| routine. 5 * 6 * Features: 7 * - Median-of-three pivoting (and more) 8 * - Truncation and final polishing by a single insertion sort 9 * - Early truncation when no swaps needed in pivoting step 10 * - Explicit recursion, guaranteed not to overflow 11 * - A few little wrinkles stolen from the GNU |qsort()|. 12 * - separate code for non-aligned / aligned / word-size objects 13 * 14 * This code may be reproduced freely provided 15 * - this file is retained unaltered apart from minor 16 * changes for portability and efficiency 17 * - no changes are made to this comment 18 * - any changes that *are* made are clearly flagged 19 * - the _ID string below is altered by inserting, after 20 * the date, the string " altered" followed at your option 21 * by other material. (Exceptions: you may change the name 22 * of the exported routine without changing the ID string. 23 * You may change the values of the macros TRUNC_* and 24 * PIVOT_THRESHOLD without changing the ID string, provided 25 * they remain constants with TRUNC_nonaligned, TRUNC_aligned 26 * and TRUNC_words/WORD_BYTES between 8 and 24, and 27 * PIVOT_THRESHOLD between 32 and 200.) 28 * 29 * You may use it in anything you like; you may make money 30 * out of it; you may distribute it in object form or as 31 * part of an executable without including source code; 32 * you don't have to credit me. (But it would be nice if 33 * you did.) 34 * 35 * If you find problems with this code, or find ways of 36 * making it significantly faster, please let me know! 37 * My e-mail address, valid as of early 1998 and certainly 38 * OK for at least the next 18 months, is 39 * gjm11@dpmms.cam.ac.uk 40 * Thanks! 41 * 42 * Gareth McCaughan Peterhouse Cambridge 1998 43 */ 44#include "SDL_config.h" 45 46/* 47#include <assert.h> 48#include <stdlib.h> 49#include <string.h> 50*/ 51#include "SDL_stdinc.h" 52 53#ifdef assert 54#undef assert 55#endif 56#define assert(X) 57#ifdef malloc 58#undef malloc 59#endif 60#define malloc SDL_malloc 61#ifdef free 62#undef free 63#endif 64#define free SDL_free 65#ifdef memcpy 66#undef memcpy 67#endif 68#define memcpy SDL_memcpy 69#ifdef memmove 70#undef memmove 71#endif 72#define memmove SDL_memmove 73#ifdef qsort 74#undef qsort 75#endif 76#define qsort SDL_qsort 77 78 79#ifndef HAVE_QSORT 80 81static char _ID[]="<qsort.c gjm 1.12 1998-03-19>"; 82 83/* How many bytes are there per word? (Must be a power of 2, 84 * and must in fact equal sizeof(int).) 85 */ 86#define WORD_BYTES sizeof(int) 87 88/* How big does our stack need to be? Answer: one entry per 89 * bit in a |size_t|. 90 */ 91#define STACK_SIZE (8*sizeof(size_t)) 92 93/* Different situations have slightly different requirements, 94 * and we make life epsilon easier by using different truncation 95 * points for the three different cases. 96 * So far, I have tuned TRUNC_words and guessed that the same 97 * value might work well for the other two cases. Of course 98 * what works well on my machine might work badly on yours. 99 */ 100#define TRUNC_nonaligned 12 101#define TRUNC_aligned 12 102#define TRUNC_words 12*WORD_BYTES /* nb different meaning */ 103 104/* We use a simple pivoting algorithm for shortish sub-arrays 105 * and a more complicated one for larger ones. The threshold 106 * is PIVOT_THRESHOLD. 107 */ 108#define PIVOT_THRESHOLD 40 109 110typedef struct { char * first; char * last; } stack_entry; 111#define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;} 112#define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;} 113#define doLeft {first=ffirst;llast=last;continue;} 114#define doRight {ffirst=first;last=llast;continue;} 115#define pop {if (--stacktop<0) break;\ 116 first=ffirst=stack[stacktop].first;\ 117 last=llast=stack[stacktop].last;\ 118 continue;} 119 120/* Some comments on the implementation. 121 * 1. When we finish partitioning the array into "low" 122 * and "high", we forget entirely about short subarrays, 123 * because they'll be done later by insertion sort. 124 * Doing lots of little insertion sorts might be a win 125 * on large datasets for locality-of-reference reasons, 126 * but it makes the code much nastier and increases 127 * bookkeeping overhead. 128 * 2. We always save the shorter and get to work on the 129 * longer. This guarantees that every time we push 130 * an item onto the stack its size is <= 1/2 of that 131 * of its parent; so the stack can't need more than 132 * log_2(max-array-size) entries. 133 * 3. We choose a pivot by looking at the first, last 134 * and middle elements. We arrange them into order 135 * because it's easy to do that in conjunction with 136 * choosing the pivot, and it makes things a little 137 * easier in the partitioning step. Anyway, the pivot 138 * is the middle of these three. It's still possible 139 * to construct datasets where the algorithm takes 140 * time of order n^2, but it simply never happens in 141 * practice. 142 * 3' Newsflash: On further investigation I find that 143 * it's easy to construct datasets where median-of-3 144 * simply isn't good enough. So on large-ish subarrays 145 * we do a more sophisticated pivoting: we take three 146 * sets of 3 elements, find their medians, and then 147 * take the median of those. 148 * 4. We copy the pivot element to a separate place 149 * because that way we can always do our comparisons 150 * directly against a pointer to that separate place, 151 * and don't have to wonder "did we move the pivot 152 * element?". This makes the inner loop better. 153 * 5. It's possible to make the pivoting even more 154 * reliable by looking at more candidates when n 155 * is larger. (Taking this to its logical conclusion 156 * results in a variant of quicksort that doesn't 157 * have that n^2 worst case.) However, the overhead 158 * from the extra bookkeeping means that it's just 159 * not worth while. 160 * 6. This is pretty clean and portable code. Here are 161 * all the potential portability pitfalls and problems 162 * I know of: 163 * - In one place (the insertion sort) I construct 164 * a pointer that points just past the end of the 165 * supplied array, and assume that (a) it won't 166 * compare equal to any pointer within the array, 167 * and (b) it will compare equal to a pointer 168 * obtained by stepping off the end of the array. 169 * These might fail on some segmented architectures. 170 * - I assume that there are 8 bits in a |char| when 171 * computing the size of stack needed. This would 172 * fail on machines with 9-bit or 16-bit bytes. 173 * - I assume that if |((int)base&(sizeof(int)-1))==0| 174 * and |(size&(sizeof(int)-1))==0| then it's safe to 175 * get at array elements via |int*|s, and that if 176 * actually |size==sizeof(int)| as well then it's 177 * safe to treat the elements as |int|s. This might 178 * fail on systems that convert pointers to integers 179 * in non-standard ways. 180 * - I assume that |8*sizeof(size_t)<=INT_MAX|. This 181 * would be false on a machine with 8-bit |char|s, 182 * 16-bit |int|s and 4096-bit |size_t|s. :-) 183 */ 184 185/* The recursion logic is the same in each case: */ 186#define Recurse(Trunc) \ 187 { size_t l=last-ffirst,r=llast-first; \ 188 if (l<Trunc) { \ 189 if (r>=Trunc) doRight \ 190 else pop \ 191 } \ 192 else if (l<=r) { pushLeft; doRight } \ 193 else if (r>=Trunc) { pushRight; doLeft }\ 194 else doLeft \ 195 } 196 197/* and so is the pivoting logic: */ 198#define Pivot(swapper,sz) \ 199 if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\ 200 else { \ 201 if (compare(first,mid)<0) { \ 202 if (compare(mid,last)>0) { \ 203 swapper(mid,last); \ 204 if (compare(first,mid)>0) swapper(first,mid);\ 205 } \ 206 } \ 207 else { \ 208 if (compare(mid,last)>0) swapper(first,last)\ 209 else { \ 210 swapper(first,mid); \ 211 if (compare(mid,last)>0) swapper(mid,last);\ 212 } \ 213 } \ 214 first+=sz; last-=sz; \ 215 } 216 217#ifdef DEBUG_QSORT 218#include <stdio.h> 219#endif 220 221/* and so is the partitioning logic: */ 222#define Partition(swapper,sz) { \ 223 int swapped=0; \ 224 do { \ 225 while (compare(first,pivot)<0) first+=sz; \ 226 while (compare(pivot,last)<0) last-=sz; \ 227 if (first<last) { \ 228 swapper(first,last); swapped=1; \ 229 first+=sz; last-=sz; } \ 230 else if (first==last) { first+=sz; last-=sz; break; }\ 231 } while (first<=last); \ 232 if (!swapped) pop \ 233} 234 235/* and so is the pre-insertion-sort operation of putting 236 * the smallest element into place as a sentinel. 237 * Doing this makes the inner loop nicer. I got this 238 * idea from the GNU implementation of qsort(). 239 */ 240#define PreInsertion(swapper,limit,sz) \ 241 first=base; \ 242 last=first + (nmemb>limit ? limit : nmemb-1)*sz;\ 243 while (last!=base) { \ 244 if (compare(first,last)>0) first=last; \ 245 last-=sz; } \ 246 if (first!=base) swapper(first,(char*)base); 247 248/* and so is the insertion sort, in the first two cases: */ 249#define Insertion(swapper) \ 250 last=((char*)base)+nmemb*size; \ 251 for (first=((char*)base)+size;first!=last;first+=size) { \ 252 char *test; \ 253 /* Find the right place for |first|. \ 254 * My apologies for var reuse. */ \ 255 for (test=first-size;compare(test,first)>0;test-=size) ; \ 256 test+=size; \ 257 if (test!=first) { \ 258 /* Shift everything in [test,first) \ 259 * up by one, and place |first| \ 260 * where |test| is. */ \ 261 memcpy(pivot,first,size); \ 262 memmove(test+size,test,first-test); \ 263 memcpy(test,pivot,size); \ 264 } \ 265 } 266 267#define SWAP_nonaligned(a,b) { \ 268 register char *aa=(a),*bb=(b); \ 269 register size_t sz=size; \ 270 do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); } 271 272#define SWAP_aligned(a,b) { \ 273 register int *aa=(int*)(a),*bb=(int*)(b); \ 274 register size_t sz=size; \ 275 do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); } 276 277#define SWAP_words(a,b) { \ 278 register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; } 279 280/* ---------------------------------------------------------------------- */ 281 282static char * pivot_big(char *first, char *mid, char *last, size_t size, 283 int compare(const void *, const void *)) { 284 size_t d=(((last-first)/size)>>3)*size; 285 char *m1,*m2,*m3; 286 { char *a=first, *b=first+d, *c=first+2*d; 287#ifdef DEBUG_QSORT 288fprintf(stderr,"< %d %d %d\n",*(int*)a,*(int*)b,*(int*)c); 289#endif 290 m1 = compare(a,b)<0 ? 291 (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a)) 292 : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b)); 293 } 294 { char *a=mid-d, *b=mid, *c=mid+d; 295#ifdef DEBUG_QSORT 296fprintf(stderr,". %d %d %d\n",*(int*)a,*(int*)b,*(int*)c); 297#endif 298 m2 = compare(a,b)<0 ? 299 (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a)) 300 : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b)); 301 } 302 { char *a=last-2*d, *b=last-d, *c=last; 303#ifdef DEBUG_QSORT 304fprintf(stderr,"> %d %d %d\n",*(int*)a,*(int*)b,*(int*)c); 305#endif 306 m3 = compare(a,b)<0 ? 307 (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a)) 308 : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b)); 309 } 310#ifdef DEBUG_QSORT 311fprintf(stderr,"-> %d %d %d\n",*(int*)m1,*(int*)m2,*(int*)m3); 312#endif 313 return compare(m1,m2)<0 ? 314 (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1)) 315 : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2)); 316} 317 318/* ---------------------------------------------------------------------- */ 319 320static void qsort_nonaligned(void *base, size_t nmemb, size_t size, 321 int (*compare)(const void *, const void *)) { 322 323 stack_entry stack[STACK_SIZE]; 324 int stacktop=0; 325 char *first,*last; 326 char *pivot=malloc(size); 327 size_t trunc=TRUNC_nonaligned*size; 328 assert(pivot!=0); 329 330 first=(char*)base; last=first+(nmemb-1)*size; 331 332 if ((size_t)(last-first)>trunc) { 333 char *ffirst=first, *llast=last; 334 while (1) { 335 /* Select pivot */ 336 { char * mid=first+size*((last-first)/size >> 1); 337 Pivot(SWAP_nonaligned,size); 338 memcpy(pivot,mid,size); 339 } 340 /* Partition. */ 341 Partition(SWAP_nonaligned,size); 342 /* Prepare to recurse/iterate. */ 343 Recurse(trunc) 344 } 345 } 346 PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size); 347 Insertion(SWAP_nonaligned); 348 free(pivot); 349} 350 351static void qsort_aligned(void *base, size_t nmemb, size_t size, 352 int (*compare)(const void *, const void *)) { 353 354 stack_entry stack[STACK_SIZE]; 355 int stacktop=0; 356 char *first,*last; 357 char *pivot=malloc(size); 358 size_t trunc=TRUNC_aligned*size; 359 assert(pivot!=0); 360 361 first=(char*)base; last=first+(nmemb-1)*size; 362 363 if ((size_t)(last-first)>trunc) { 364 char *ffirst=first,*llast=last; 365 while (1) { 366 /* Select pivot */ 367 { char * mid=first+size*((last-first)/size >> 1); 368 Pivot(SWAP_aligned,size); 369 memcpy(pivot,mid,size); 370 } 371 /* Partition. */ 372 Partition(SWAP_aligned,size); 373 /* Prepare to recurse/iterate. */ 374 Recurse(trunc) 375 } 376 } 377 PreInsertion(SWAP_aligned,TRUNC_aligned,size); 378 Insertion(SWAP_aligned); 379 free(pivot); 380} 381 382static void qsort_words(void *base, size_t nmemb, 383 int (*compare)(const void *, const void *)) { 384 385 stack_entry stack[STACK_SIZE]; 386 int stacktop=0; 387 char *first,*last; 388 char *pivot=malloc(WORD_BYTES); 389 assert(pivot!=0); 390 391 first=(char*)base; last=first+(nmemb-1)*WORD_BYTES; 392 393 if (last-first>TRUNC_words) { 394 char *ffirst=first, *llast=last; 395 while (1) { 396#ifdef DEBUG_QSORT 397fprintf(stderr,"Doing %d:%d: ", 398 (first-(char*)base)/WORD_BYTES, 399 (last-(char*)base)/WORD_BYTES); 400#endif 401 /* Select pivot */ 402 { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES)); 403 Pivot(SWAP_words,WORD_BYTES); 404 *(int*)pivot=*(int*)mid; 405 } 406#ifdef DEBUG_QSORT 407fprintf(stderr,"pivot=%d\n",*(int*)pivot); 408#endif 409 /* Partition. */ 410 Partition(SWAP_words,WORD_BYTES); 411 /* Prepare to recurse/iterate. */ 412 Recurse(TRUNC_words) 413 } 414 } 415 PreInsertion(SWAP_words,(TRUNC_words/WORD_BYTES),WORD_BYTES); 416 /* Now do insertion sort. */ 417 last=((char*)base)+nmemb*WORD_BYTES; 418 for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) { 419 /* Find the right place for |first|. My apologies for var reuse */ 420 int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first; 421 *(int*)pivot=*(int*)first; 422 for (;compare(pl,pivot)>0;pr=pl,--pl) { 423 *pr=*pl; } 424 if (pr!=(int*)first) *pr=*(int*)pivot; 425 } 426 free(pivot); 427} 428 429/* ---------------------------------------------------------------------- */ 430 431void qsort(void *base, size_t nmemb, size_t size, 432 int (*compare)(const void *, const void *)) { 433 434 if (nmemb<=1) return; 435 if (((uintptr_t)base|size)&(WORD_BYTES-1)) 436 qsort_nonaligned(base,nmemb,size,compare); 437 else if (size!=WORD_BYTES) 438 qsort_aligned(base,nmemb,size,compare); 439 else 440 qsort_words(base,nmemb,compare); 441} 442 443#endif /* !HAVE_QSORT */ 444