1/* 2 * sssort.c for libdivsufsort 3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person 6 * obtaining a copy of this software and associated documentation 7 * files (the "Software"), to deal in the Software without 8 * restriction, including without limitation the rights to use, 9 * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following 12 * conditions: 13 * 14 * The above copyright notice and this permission notice shall be 15 * included in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 * OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27#include "divsufsort_private.h" 28 29 30/*- Private Functions -*/ 31 32static const saint_t lg_table[256]= { 33 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, 34 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 35 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 36 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 37 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 38 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 39 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 40 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 41}; 42 43#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) 44 45static INLINE 46saint_t 47ss_ilg(saidx_t n) { 48#if SS_BLOCKSIZE == 0 49# if defined(BUILD_DIVSUFSORT64) 50 return (n >> 32) ? 51 ((n >> 48) ? 52 ((n >> 56) ? 53 56 + lg_table[(n >> 56) & 0xff] : 54 48 + lg_table[(n >> 48) & 0xff]) : 55 ((n >> 40) ? 56 40 + lg_table[(n >> 40) & 0xff] : 57 32 + lg_table[(n >> 32) & 0xff])) : 58 ((n & 0xffff0000) ? 59 ((n & 0xff000000) ? 60 24 + lg_table[(n >> 24) & 0xff] : 61 16 + lg_table[(n >> 16) & 0xff]) : 62 ((n & 0x0000ff00) ? 63 8 + lg_table[(n >> 8) & 0xff] : 64 0 + lg_table[(n >> 0) & 0xff])); 65# else 66 return (n & 0xffff0000) ? 67 ((n & 0xff000000) ? 68 24 + lg_table[(n >> 24) & 0xff] : 69 16 + lg_table[(n >> 16) & 0xff]) : 70 ((n & 0x0000ff00) ? 71 8 + lg_table[(n >> 8) & 0xff] : 72 0 + lg_table[(n >> 0) & 0xff]); 73# endif 74#elif SS_BLOCKSIZE < 256 75 return lg_table[n]; 76#else 77 return (n & 0xff00) ? 78 8 + lg_table[(n >> 8) & 0xff] : 79 0 + lg_table[(n >> 0) & 0xff]; 80#endif 81} 82 83#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ 84 85#if SS_BLOCKSIZE != 0 86 87static const saint_t sqq_table[256] = { 88 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, 89 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, 90 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, 91110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 92128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 93143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, 94156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, 95169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, 96181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, 97192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, 98202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, 99212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, 100221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, 101230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, 102239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 103247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 104}; 105 106static INLINE 107saidx_t 108ss_isqrt(saidx_t x) { 109 saidx_t y, e; 110 111 if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } 112 e = (x & 0xffff0000) ? 113 ((x & 0xff000000) ? 114 24 + lg_table[(x >> 24) & 0xff] : 115 16 + lg_table[(x >> 16) & 0xff]) : 116 ((x & 0x0000ff00) ? 117 8 + lg_table[(x >> 8) & 0xff] : 118 0 + lg_table[(x >> 0) & 0xff]); 119 120 if(e >= 16) { 121 y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); 122 if(e >= 24) { y = (y + 1 + x / y) >> 1; } 123 y = (y + 1 + x / y) >> 1; 124 } else if(e >= 8) { 125 y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; 126 } else { 127 return sqq_table[x] >> 4; 128 } 129 130 return (x < (y * y)) ? y - 1 : y; 131} 132 133#endif /* SS_BLOCKSIZE != 0 */ 134 135 136/*---------------------------------------------------------------------------*/ 137 138/* Compares two suffixes. */ 139static INLINE 140saint_t 141ss_compare(const sauchar_t *T, 142 const saidx_t *p1, const saidx_t *p2, 143 saidx_t depth) { 144 const sauchar_t *U1, *U2, *U1n, *U2n; 145 146 for(U1 = T + depth + *p1, 147 U2 = T + depth + *p2, 148 U1n = T + *(p1 + 1) + 2, 149 U2n = T + *(p2 + 1) + 2; 150 (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); 151 ++U1, ++U2) { 152 } 153 154 return U1 < U1n ? 155 (U2 < U2n ? *U1 - *U2 : 1) : 156 (U2 < U2n ? -1 : 0); 157} 158 159 160/*---------------------------------------------------------------------------*/ 161 162#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) 163 164/* Insertionsort for small size groups */ 165static 166void 167ss_insertionsort(const sauchar_t *T, const saidx_t *PA, 168 saidx_t *first, saidx_t *last, saidx_t depth) { 169 saidx_t *i, *j; 170 saidx_t t; 171 saint_t r; 172 173 for(i = last - 2; first <= i; --i) { 174 for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { 175 do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); 176 if(last <= j) { break; } 177 } 178 if(r == 0) { *j = ~*j; } 179 *(j - 1) = t; 180 } 181} 182 183#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ 184 185 186/*---------------------------------------------------------------------------*/ 187 188#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) 189 190static INLINE 191void 192ss_fixdown(const sauchar_t *Td, const saidx_t *PA, 193 saidx_t *SA, saidx_t i, saidx_t size) { 194 saidx_t j, k; 195 saidx_t v; 196 saint_t c, d, e; 197 198 for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { 199 d = Td[PA[SA[k = j++]]]; 200 if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } 201 if(d <= c) { break; } 202 } 203 SA[i] = v; 204} 205 206/* Simple top-down heapsort. */ 207static 208void 209ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) { 210 saidx_t i, m; 211 saidx_t t; 212 213 m = size; 214 if((size % 2) == 0) { 215 m--; 216 if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } 217 } 218 219 for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } 220 if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } 221 for(i = m - 1; 0 < i; --i) { 222 t = SA[0], SA[0] = SA[i]; 223 ss_fixdown(Td, PA, SA, 0, i); 224 SA[i] = t; 225 } 226} 227 228 229/*---------------------------------------------------------------------------*/ 230 231/* Returns the median of three elements. */ 232static INLINE 233saidx_t * 234ss_median3(const sauchar_t *Td, const saidx_t *PA, 235 saidx_t *v1, saidx_t *v2, saidx_t *v3) { 236 saidx_t *t; 237 if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } 238 if(Td[PA[*v2]] > Td[PA[*v3]]) { 239 if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } 240 else { return v3; } 241 } 242 return v2; 243} 244 245/* Returns the median of five elements. */ 246static INLINE 247saidx_t * 248ss_median5(const sauchar_t *Td, const saidx_t *PA, 249 saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) { 250 saidx_t *t; 251 if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } 252 if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } 253 if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } 254 if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } 255 if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } 256 if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } 257 return v3; 258} 259 260/* Returns the pivot element. */ 261static INLINE 262saidx_t * 263ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) { 264 saidx_t *middle; 265 saidx_t t; 266 267 t = last - first; 268 middle = first + t / 2; 269 270 if(t <= 512) { 271 if(t <= 32) { 272 return ss_median3(Td, PA, first, middle, last - 1); 273 } else { 274 t >>= 2; 275 return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); 276 } 277 } 278 t >>= 3; 279 first = ss_median3(Td, PA, first, first + t, first + (t << 1)); 280 middle = ss_median3(Td, PA, middle - t, middle, middle + t); 281 last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); 282 return ss_median3(Td, PA, first, middle, last); 283} 284 285 286/*---------------------------------------------------------------------------*/ 287 288/* Binary partition for substrings. */ 289static INLINE 290saidx_t * 291ss_partition(const saidx_t *PA, 292 saidx_t *first, saidx_t *last, saidx_t depth) { 293 saidx_t *a, *b; 294 saidx_t t; 295 for(a = first - 1, b = last;;) { 296 for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } 297 for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } 298 if(b <= a) { break; } 299 t = ~*b; 300 *b = *a; 301 *a = t; 302 } 303 if(first < a) { *first = ~*first; } 304 return a; 305} 306 307/* Multikey introsort for medium size groups. */ 308static 309void 310ss_mintrosort(const sauchar_t *T, const saidx_t *PA, 311 saidx_t *first, saidx_t *last, 312 saidx_t depth) { 313#define STACK_SIZE SS_MISORT_STACKSIZE 314 struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE]; 315 const sauchar_t *Td; 316 saidx_t *a, *b, *c, *d, *e, *f; 317 saidx_t s, t; 318 saint_t ssize; 319 saint_t limit; 320 saint_t v, x = 0; 321 322 for(ssize = 0, limit = ss_ilg(last - first);;) { 323 324 if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { 325#if 1 < SS_INSERTIONSORT_THRESHOLD 326 if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } 327#endif 328 STACK_POP(first, last, depth, limit); 329 continue; 330 } 331 332 Td = T + depth; 333 if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } 334 if(limit < 0) { 335 for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { 336 if((x = Td[PA[*a]]) != v) { 337 if(1 < (a - first)) { break; } 338 v = x; 339 first = a; 340 } 341 } 342 if(Td[PA[*first] - 1] < v) { 343 first = ss_partition(PA, first, a, depth); 344 } 345 if((a - first) <= (last - a)) { 346 if(1 < (a - first)) { 347 STACK_PUSH(a, last, depth, -1); 348 last = a, depth += 1, limit = ss_ilg(a - first); 349 } else { 350 first = a, limit = -1; 351 } 352 } else { 353 if(1 < (last - a)) { 354 STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); 355 first = a, limit = -1; 356 } else { 357 last = a, depth += 1, limit = ss_ilg(a - first); 358 } 359 } 360 continue; 361 } 362 363 /* choose pivot */ 364 a = ss_pivot(Td, PA, first, last); 365 v = Td[PA[*a]]; 366 SWAP(*first, *a); 367 368 /* partition */ 369 for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } 370 if(((a = b) < last) && (x < v)) { 371 for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { 372 if(x == v) { SWAP(*b, *a); ++a; } 373 } 374 } 375 for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } 376 if((b < (d = c)) && (x > v)) { 377 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { 378 if(x == v) { SWAP(*c, *d); --d; } 379 } 380 } 381 for(; b < c;) { 382 SWAP(*b, *c); 383 for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { 384 if(x == v) { SWAP(*b, *a); ++a; } 385 } 386 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { 387 if(x == v) { SWAP(*c, *d); --d; } 388 } 389 } 390 391 if(a <= d) { 392 c = b - 1; 393 394 if((s = a - first) > (t = b - a)) { s = t; } 395 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 396 if((s = d - c) > (t = last - d - 1)) { s = t; } 397 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 398 399 a = first + (b - a), c = last - (d - c); 400 b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); 401 402 if((a - first) <= (last - c)) { 403 if((last - c) <= (c - b)) { 404 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 405 STACK_PUSH(c, last, depth, limit); 406 last = a; 407 } else if((a - first) <= (c - b)) { 408 STACK_PUSH(c, last, depth, limit); 409 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 410 last = a; 411 } else { 412 STACK_PUSH(c, last, depth, limit); 413 STACK_PUSH(first, a, depth, limit); 414 first = b, last = c, depth += 1, limit = ss_ilg(c - b); 415 } 416 } else { 417 if((a - first) <= (c - b)) { 418 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 419 STACK_PUSH(first, a, depth, limit); 420 first = c; 421 } else if((last - c) <= (c - b)) { 422 STACK_PUSH(first, a, depth, limit); 423 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 424 first = c; 425 } else { 426 STACK_PUSH(first, a, depth, limit); 427 STACK_PUSH(c, last, depth, limit); 428 first = b, last = c, depth += 1, limit = ss_ilg(c - b); 429 } 430 } 431 } else { 432 limit += 1; 433 if(Td[PA[*first] - 1] < v) { 434 first = ss_partition(PA, first, last, depth); 435 limit = ss_ilg(last - first); 436 } 437 depth += 1; 438 } 439 } 440#undef STACK_SIZE 441} 442 443#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ 444 445 446/*---------------------------------------------------------------------------*/ 447 448#if SS_BLOCKSIZE != 0 449 450static INLINE 451void 452ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) { 453 saidx_t t; 454 for(; 0 < n; --n, ++a, ++b) { 455 t = *a, *a = *b, *b = t; 456 } 457} 458 459static INLINE 460void 461ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) { 462 saidx_t *a, *b, t; 463 saidx_t l, r; 464 l = middle - first, r = last - middle; 465 for(; (0 < l) && (0 < r);) { 466 if(l == r) { ss_blockswap(first, middle, l); break; } 467 if(l < r) { 468 a = last - 1, b = middle - 1; 469 t = *a; 470 do { 471 *a-- = *b, *b-- = *a; 472 if(b < first) { 473 *a = t; 474 last = a; 475 if((r -= l + 1) <= l) { break; } 476 a -= 1, b = middle - 1; 477 t = *a; 478 } 479 } while(1); 480 } else { 481 a = first, b = middle; 482 t = *a; 483 do { 484 *a++ = *b, *b++ = *a; 485 if(last <= b) { 486 *a = t; 487 first = a + 1; 488 if((l -= r + 1) <= r) { break; } 489 a += 1, b = middle; 490 t = *a; 491 } 492 } while(1); 493 } 494 } 495} 496 497 498/*---------------------------------------------------------------------------*/ 499 500static 501void 502ss_inplacemerge(const sauchar_t *T, const saidx_t *PA, 503 saidx_t *first, saidx_t *middle, saidx_t *last, 504 saidx_t depth) { 505 const saidx_t *p; 506 saidx_t *a, *b; 507 saidx_t len, half; 508 saint_t q, r; 509 saint_t x; 510 511 for(;;) { 512 if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } 513 else { x = 0; p = PA + *(last - 1); } 514 for(a = first, len = middle - first, half = len >> 1, r = -1; 515 0 < len; 516 len = half, half >>= 1) { 517 b = a + half; 518 q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); 519 if(q < 0) { 520 a = b + 1; 521 half -= (len & 1) ^ 1; 522 } else { 523 r = q; 524 } 525 } 526 if(a < middle) { 527 if(r == 0) { *a = ~*a; } 528 ss_rotate(a, middle, last); 529 last -= middle - a; 530 middle = a; 531 if(first == middle) { break; } 532 } 533 --last; 534 if(x != 0) { while(*--last < 0) { } } 535 if(middle == last) { break; } 536 } 537} 538 539 540/*---------------------------------------------------------------------------*/ 541 542/* Merge-forward with internal buffer. */ 543static 544void 545ss_mergeforward(const sauchar_t *T, const saidx_t *PA, 546 saidx_t *first, saidx_t *middle, saidx_t *last, 547 saidx_t *buf, saidx_t depth) { 548 saidx_t *a, *b, *c, *bufend; 549 saidx_t t; 550 saint_t r; 551 552 bufend = buf + (middle - first) - 1; 553 ss_blockswap(buf, first, middle - first); 554 555 for(t = *(a = first), b = buf, c = middle;;) { 556 r = ss_compare(T, PA + *b, PA + *c, depth); 557 if(r < 0) { 558 do { 559 *a++ = *b; 560 if(bufend <= b) { *bufend = t; return; } 561 *b++ = *a; 562 } while(*b < 0); 563 } else if(r > 0) { 564 do { 565 *a++ = *c, *c++ = *a; 566 if(last <= c) { 567 while(b < bufend) { *a++ = *b, *b++ = *a; } 568 *a = *b, *b = t; 569 return; 570 } 571 } while(*c < 0); 572 } else { 573 *c = ~*c; 574 do { 575 *a++ = *b; 576 if(bufend <= b) { *bufend = t; return; } 577 *b++ = *a; 578 } while(*b < 0); 579 580 do { 581 *a++ = *c, *c++ = *a; 582 if(last <= c) { 583 while(b < bufend) { *a++ = *b, *b++ = *a; } 584 *a = *b, *b = t; 585 return; 586 } 587 } while(*c < 0); 588 } 589 } 590} 591 592/* Merge-backward with internal buffer. */ 593static 594void 595ss_mergebackward(const sauchar_t *T, const saidx_t *PA, 596 saidx_t *first, saidx_t *middle, saidx_t *last, 597 saidx_t *buf, saidx_t depth) { 598 const saidx_t *p1, *p2; 599 saidx_t *a, *b, *c, *bufend; 600 saidx_t t; 601 saint_t r; 602 saint_t x; 603 604 bufend = buf + (last - middle) - 1; 605 ss_blockswap(buf, middle, last - middle); 606 607 x = 0; 608 if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } 609 else { p1 = PA + *bufend; } 610 if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } 611 else { p2 = PA + *(middle - 1); } 612 for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { 613 r = ss_compare(T, p1, p2, depth); 614 if(0 < r) { 615 if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } 616 *a-- = *b; 617 if(b <= buf) { *buf = t; break; } 618 *b-- = *a; 619 if(*b < 0) { p1 = PA + ~*b; x |= 1; } 620 else { p1 = PA + *b; } 621 } else if(r < 0) { 622 if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } 623 *a-- = *c, *c-- = *a; 624 if(c < first) { 625 while(buf < b) { *a-- = *b, *b-- = *a; } 626 *a = *b, *b = t; 627 break; 628 } 629 if(*c < 0) { p2 = PA + ~*c; x |= 2; } 630 else { p2 = PA + *c; } 631 } else { 632 if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } 633 *a-- = ~*b; 634 if(b <= buf) { *buf = t; break; } 635 *b-- = *a; 636 if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } 637 *a-- = *c, *c-- = *a; 638 if(c < first) { 639 while(buf < b) { *a-- = *b, *b-- = *a; } 640 *a = *b, *b = t; 641 break; 642 } 643 if(*b < 0) { p1 = PA + ~*b; x |= 1; } 644 else { p1 = PA + *b; } 645 if(*c < 0) { p2 = PA + ~*c; x |= 2; } 646 else { p2 = PA + *c; } 647 } 648 } 649} 650 651/* D&C based merge. */ 652static 653void 654ss_swapmerge(const sauchar_t *T, const saidx_t *PA, 655 saidx_t *first, saidx_t *middle, saidx_t *last, 656 saidx_t *buf, saidx_t bufsize, saidx_t depth) { 657#define STACK_SIZE SS_SMERGE_STACKSIZE 658#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) 659#define MERGE_CHECK(a, b, c)\ 660 do {\ 661 if(((c) & 1) ||\ 662 (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ 663 *(a) = ~*(a);\ 664 }\ 665 if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ 666 *(b) = ~*(b);\ 667 }\ 668 } while(0) 669 struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE]; 670 saidx_t *l, *r, *lm, *rm; 671 saidx_t m, len, half; 672 saint_t ssize; 673 saint_t check, next; 674 675 for(check = 0, ssize = 0;;) { 676 if((last - middle) <= bufsize) { 677 if((first < middle) && (middle < last)) { 678 ss_mergebackward(T, PA, first, middle, last, buf, depth); 679 } 680 MERGE_CHECK(first, last, check); 681 STACK_POP(first, middle, last, check); 682 continue; 683 } 684 685 if((middle - first) <= bufsize) { 686 if(first < middle) { 687 ss_mergeforward(T, PA, first, middle, last, buf, depth); 688 } 689 MERGE_CHECK(first, last, check); 690 STACK_POP(first, middle, last, check); 691 continue; 692 } 693 694 for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; 695 0 < len; 696 len = half, half >>= 1) { 697 if(ss_compare(T, PA + GETIDX(*(middle + m + half)), 698 PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { 699 m += half + 1; 700 half -= (len & 1) ^ 1; 701 } 702 } 703 704 if(0 < m) { 705 lm = middle - m, rm = middle + m; 706 ss_blockswap(lm, middle, m); 707 l = r = middle, next = 0; 708 if(rm < last) { 709 if(*rm < 0) { 710 *rm = ~*rm; 711 if(first < lm) { for(; *--l < 0;) { } next |= 4; } 712 next |= 1; 713 } else if(first < lm) { 714 for(; *r < 0; ++r) { } 715 next |= 2; 716 } 717 } 718 719 if((l - first) <= (last - r)) { 720 STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); 721 middle = lm, last = l, check = (check & 3) | (next & 4); 722 } else { 723 if((next & 2) && (r == middle)) { next ^= 6; } 724 STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); 725 first = r, middle = rm, check = (next & 3) | (check & 4); 726 } 727 } else { 728 if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { 729 *middle = ~*middle; 730 } 731 MERGE_CHECK(first, last, check); 732 STACK_POP(first, middle, last, check); 733 } 734 } 735#undef STACK_SIZE 736} 737 738#endif /* SS_BLOCKSIZE != 0 */ 739 740 741/*---------------------------------------------------------------------------*/ 742 743/*- Function -*/ 744 745/* Substring sort */ 746void 747sssort(const sauchar_t *T, const saidx_t *PA, 748 saidx_t *first, saidx_t *last, 749 saidx_t *buf, saidx_t bufsize, 750 saidx_t depth, saidx_t n, saint_t lastsuffix) { 751 saidx_t *a; 752#if SS_BLOCKSIZE != 0 753 saidx_t *b, *middle, *curbuf; 754 saidx_t j, k, curbufsize, limit; 755#endif 756 saidx_t i; 757 758 if(lastsuffix != 0) { ++first; } 759 760#if SS_BLOCKSIZE == 0 761 ss_mintrosort(T, PA, first, last, depth); 762#else 763 if((bufsize < SS_BLOCKSIZE) && 764 (bufsize < (last - first)) && 765 (bufsize < (limit = ss_isqrt(last - first)))) { 766 if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } 767 buf = middle = last - limit, bufsize = limit; 768 } else { 769 middle = last, limit = 0; 770 } 771 for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { 772#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 773 ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); 774#elif 1 < SS_BLOCKSIZE 775 ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); 776#endif 777 curbufsize = last - (a + SS_BLOCKSIZE); 778 curbuf = a + SS_BLOCKSIZE; 779 if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } 780 for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { 781 ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); 782 } 783 } 784#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 785 ss_mintrosort(T, PA, a, middle, depth); 786#elif 1 < SS_BLOCKSIZE 787 ss_insertionsort(T, PA, a, middle, depth); 788#endif 789 for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { 790 if(i & 1) { 791 ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); 792 a -= k; 793 } 794 } 795 if(limit != 0) { 796#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 797 ss_mintrosort(T, PA, middle, last, depth); 798#elif 1 < SS_BLOCKSIZE 799 ss_insertionsort(T, PA, middle, last, depth); 800#endif 801 ss_inplacemerge(T, PA, first, middle, last, depth); 802 } 803#endif 804 805 if(lastsuffix != 0) { 806 /* Insert last type B* suffix. */ 807 saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; 808 for(a = first, i = *(first - 1); 809 (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); 810 ++a) { 811 *(a - 1) = *a; 812 } 813 *(a - 1) = i; 814 } 815} 816