1//Templated spread_sort library
2
3//          Copyright Steven J. Ross 2001 - 2009.
4// Distributed under the Boost Software License, Version 1.0.
5//    (See accompanying file LICENSE_1_0.txt or copy at
6//          http://www.boost.org/LICENSE_1_0.txt)
7
8//  See http://www.boost.org/ for updates, documentation, and revision history.
9
10/*
11Some improvements suggested by:
12Phil Endecott and Frank Gennari
13Cygwin fix provided by:
14Scott McMurray
15*/
16
17#ifndef BOOST_SPREAD_SORT_H
18#define BOOST_SPREAD_SORT_H
19#include <algorithm>
20#include <vector>
21#include "constants.hpp"
22#include <cstring>
23
24namespace boost {
25  namespace detail {
26  	//This only works on unsigned data types
27  	template <typename T>
28  	inline unsigned
29  	rough_log_2_size(const T& input)
30  	{
31  		unsigned result = 0;
32  		//The && is necessary on some compilers to avoid infinite loops; it doesn't significantly impair performance
33  		while((input >> result) && (result < (8*sizeof(T)))) ++result;
34  		return result;
35  	}
36
37  	//Gets the maximum size which we'll call spread_sort on to control worst-case performance
38  	//Maintains both a minimum size to recurse and a check of distribution size versus count
39  	//This is called for a set of bins, instead of bin-by-bin, to avoid performance overhead
40  	inline size_t
41  	get_max_count(unsigned log_range, size_t count)
42  	{
43  		unsigned divisor = rough_log_2_size(count);
44  		//Making sure the divisor is positive
45  		if(divisor > LOG_MEAN_BIN_SIZE)
46  			divisor -= LOG_MEAN_BIN_SIZE;
47  		else
48  			divisor = 1;
49  		unsigned relative_width = (LOG_CONST * log_range)/((divisor > MAX_SPLITS) ? MAX_SPLITS : divisor);
50  		//Don't try to bitshift more than the size of an element
51  		if((8*sizeof(size_t)) <= relative_width)
52  			relative_width = (8*sizeof(size_t)) - 1;
53  		return (size_t)1 << ((relative_width < (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)) ?
54  			(LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT) :  relative_width);
55  	}
56
57  	//Find the minimum and maximum using <
58  	template <class RandomAccessIter>
59  	inline void
60  	find_extremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min)
61  	{
62  		min = max = current;
63  		//Start from the second item, as max and min are initialized to the first
64  		while(++current < last) {
65  			if(*max < *current)
66  				max = current;
67  			else if(*current < *min)
68  				min = current;
69  		}
70  	}
71
72  	//Uses a user-defined comparison operator to find minimum and maximum
73  	template <class RandomAccessIter, class compare>
74  	inline void
75  	find_extremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min, compare comp)
76  	{
77  		min = max = current;
78  		while(++current < last) {
79  			if(comp(*max, *current))
80  				max = current;
81  			else if(comp(*current, *min))
82  				min = current;
83  		}
84  	}
85
86  	//Gets a non-negative right bit shift to operate as a logarithmic divisor
87  	inline int
88  	get_log_divisor(size_t count, unsigned log_range)
89  	{
90  		int log_divisor;
91  		//If we can finish in one iteration without exceeding either (2 to the MAX_SPLITS) or n bins, do so
92  		if((log_divisor = log_range - rough_log_2_size(count)) <= 0 && log_range < MAX_SPLITS)
93  			log_divisor = 0;
94  		else {
95  			//otherwise divide the data into an optimized number of pieces
96  			log_divisor += LOG_MEAN_BIN_SIZE;
97  			if(log_divisor < 0)
98  				log_divisor = 0;
99  			//Cannot exceed MAX_SPLITS or cache misses slow down bin lookups dramatically
100  			if((log_range - log_divisor) > MAX_SPLITS)
101  				log_divisor = log_range - MAX_SPLITS;
102  		}
103  		return log_divisor;
104  	}
105
106  	template <class RandomAccessIter>
107  	inline RandomAccessIter *
108  	size_bins(std::vector<size_t> &bin_sizes, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
109  	{
110  		//Assure space for the size of each bin, followed by initializing sizes
111  		if(bin_count > bin_sizes.size())
112  			bin_sizes.resize(bin_count);
113  		for(size_t u = 0; u < bin_count; u++)
114  			bin_sizes[u] = 0;
115  		//Make sure there is space for the bins
116  		cache_end = cache_offset + bin_count;
117  		if(cache_end > bin_cache.size())
118  			bin_cache.resize(cache_end);
119  		return &(bin_cache[cache_offset]);
120  	}
121
122  	//Implementation for recursive integer sorting
123  	template <class RandomAccessIter, class div_type, class data_type>
124  	inline void
125  	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
126  				  , std::vector<size_t> &bin_sizes)
127  	{
128  		//This step is roughly 10% of runtime, but it helps avoid worst-case behavior and improve behavior with real data
129  		//If you know the maximum and minimum ahead of time, you can pass those values in and skip this step for the first iteration
130  		RandomAccessIter max, min;
131  		find_extremes(first, last, max, min);
132  		//max and min will be the same (the first item) iff all values are equivalent
133  		if(max == min)
134  			return;
135  		RandomAccessIter * target_bin;
136  		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(*max >> 0) - (*min >> 0)));
137  		div_type div_min = *min >> log_divisor;
138  		div_type div_max = *max >> log_divisor;
139  		unsigned bin_count = div_max - div_min + 1;
140  		unsigned cache_end;
141  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
142
143  		//Calculating the size of each bin; this takes roughly 10% of runtime
144  		for (RandomAccessIter current = first; current != last;)
145  			bin_sizes[(*(current++) >> log_divisor) - div_min]++;
146  		//Assign the bin positions
147  		bins[0] = first;
148  		for(unsigned u = 0; u < bin_count - 1; u++)
149  			bins[u + 1] = bins[u] + bin_sizes[u];
150
151  		//Swap into place
152  		//This dominates runtime, mostly in the swap and bin lookups
153  		RandomAccessIter nextbinstart = first;
154  		for(unsigned u = 0; u < bin_count - 1; ++u) {
155  			RandomAccessIter * local_bin = bins + u;
156  			nextbinstart += bin_sizes[u];
157  			//Iterating over each element in this bin
158  			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
159  				//Swapping elements in current into place until the correct element has been swapped in
160  				for(target_bin = (bins + ((*current >> log_divisor) - div_min));  target_bin != local_bin;
161  					target_bin = bins + ((*current >> log_divisor) - div_min)) {
162  					//3-way swap; this is about 1% faster than a 2-way swap with integers
163  					//The main advantage is less copies are involved per item put in the correct place
164  					data_type tmp;
165  					RandomAccessIter b = (*target_bin)++;
166  					RandomAccessIter * b_bin = bins + ((*b >> log_divisor) - div_min);
167  					if (b_bin != local_bin) {
168  						RandomAccessIter c = (*b_bin)++;
169  						tmp = *c;
170  						*c = *b;
171  					}
172  					else
173  						tmp = *b;
174  					*b = *current;
175  					*current = tmp;
176  				}
177  			}
178  			*local_bin = nextbinstart;
179  		}
180  		bins[bin_count - 1] = last;
181
182  		//If we've bucketsorted, the array is sorted and we should skip recursion
183  		if(!log_divisor)
184  			return;
185
186  		//Recursing; log_divisor is the remaining range
187  		size_t max_count = get_max_count(log_divisor, last - first);
188  		RandomAccessIter lastPos = first;
189  		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
190  			size_t count = bin_cache[u] - lastPos;
191  			//don't sort unless there are at least two items to compare
192  			if(count < 2)
193  				continue;
194  			//using std::sort if its worst-case is better
195  			if(count < max_count)
196  				std::sort(lastPos, bin_cache[u]);
197  			else
198  				spread_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
199  		}
200  	}
201
202  	//Generic bitshift-based 3-way swapping code
203  	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
204  	inline void inner_swap_loop(RandomAccessIter * bins, const RandomAccessIter & nextbinstart, unsigned ii, right_shift &shift
205  		, const unsigned log_divisor, const div_type div_min)
206  	{
207  		RandomAccessIter * local_bin = bins + ii;
208  		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
209  			for(RandomAccessIter * target_bin = (bins + (shift(*current, log_divisor) - div_min));  target_bin != local_bin;
210  				target_bin = bins + (shift(*current, log_divisor) - div_min)) {
211  				data_type tmp;
212  				RandomAccessIter b = (*target_bin)++;
213  				RandomAccessIter * b_bin = bins + (shift(*b, log_divisor) - div_min);
214  				//Three-way swap; if the item to be swapped doesn't belong in the current bin, swap it to where it belongs
215  				if (b_bin != local_bin) {
216  					RandomAccessIter c = (*b_bin)++;
217  					tmp = *c;
218  					*c = *b;
219  				}
220  				//Note: we could increment current once the swap is done in this case, but that seems to impair performance
221  				else
222  					tmp = *b;
223  				*b = *current;
224  				*current = tmp;
225  			}
226  		}
227  		*local_bin = nextbinstart;
228  	}
229
230  	//Standard swapping wrapper for ascending values
231  	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
232  	inline void swap_loop(RandomAccessIter * bins, RandomAccessIter & nextbinstart, unsigned ii, right_shift &shift
233  		, const std::vector<size_t> &bin_sizes, const unsigned log_divisor, const div_type div_min)
234  	{
235  		nextbinstart += bin_sizes[ii];
236  		inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, log_divisor, div_min);
237  	}
238
239  	//Functor implementation for recursive sorting
240  	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
241  	inline void
242  	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
243  					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
244  	{
245  		RandomAccessIter max, min;
246  		find_extremes(first, last, max, min, comp);
247  		if(max == min)
248  			return;
249  		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(shift(*max, 0)) - (shift(*min, 0))));
250  		div_type div_min = shift(*min, log_divisor);
251  		div_type div_max = shift(*max, log_divisor);
252  		unsigned bin_count = div_max - div_min + 1;
253  		unsigned cache_end;
254  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
255
256  		//Calculating the size of each bin
257  		for (RandomAccessIter current = first; current != last;)
258  			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
259  		bins[0] = first;
260  		for(unsigned u = 0; u < bin_count - 1; u++)
261  			bins[u + 1] = bins[u] + bin_sizes[u];
262
263  		//Swap into place
264  		RandomAccessIter nextbinstart = first;
265  		for(unsigned u = 0; u < bin_count - 1; ++u)
266  			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, bin_sizes, log_divisor, div_min);
267  		bins[bin_count - 1] = last;
268
269  		//If we've bucketsorted, the array is sorted and we should skip recursion
270  		if(!log_divisor)
271  			return;
272
273  		//Recursing
274  		size_t max_count = get_max_count(log_divisor, last - first);
275  		RandomAccessIter lastPos = first;
276  		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
277  			size_t count = bin_cache[u] - lastPos;
278  			if(count < 2)
279  				continue;
280  			if(count < max_count)
281  				std::sort(lastPos, bin_cache[u], comp);
282  			else
283  				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift, comp);
284  		}
285  	}
286
287  	//Functor implementation for recursive sorting with only Shift overridden
288  	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
289  	inline void
290  	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
291  					, std::vector<size_t> &bin_sizes, right_shift shift)
292  	{
293  		RandomAccessIter max, min;
294  		find_extremes(first, last, max, min);
295  		if(max == min)
296  			return;
297  		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(shift(*max, 0)) - (shift(*min, 0))));
298  		div_type div_min = shift(*min, log_divisor);
299  		div_type div_max = shift(*max, log_divisor);
300  		unsigned bin_count = div_max - div_min + 1;
301  		unsigned cache_end;
302  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
303
304  		//Calculating the size of each bin
305  		for (RandomAccessIter current = first; current != last;)
306  			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
307  		bins[0] = first;
308  		for(unsigned u = 0; u < bin_count - 1; u++)
309  			bins[u + 1] = bins[u] + bin_sizes[u];
310
311  		//Swap into place
312  		RandomAccessIter nextbinstart = first;
313  		for(unsigned ii = 0; ii < bin_count - 1; ++ii)
314  			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
315  		bins[bin_count - 1] = last;
316
317  		//If we've bucketsorted, the array is sorted and we should skip recursion
318  		if(!log_divisor)
319  			return;
320
321  		//Recursing
322  		size_t max_count = get_max_count(log_divisor, last - first);
323  		RandomAccessIter lastPos = first;
324  		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
325  			size_t count = bin_cache[u] - lastPos;
326  			if(count < 2)
327  				continue;
328  			if(count < max_count)
329  				std::sort(lastPos, bin_cache[u]);
330  			else
331  				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift);
332  		}
333  	}
334
335  	//Holds the bin vector and makes the initial recursive call
336  	template <class RandomAccessIter, class div_type, class data_type>
337  	inline void
338  	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type)
339  	{
340  		std::vector<size_t> bin_sizes;
341  		std::vector<RandomAccessIter> bin_cache;
342  		spread_sort_rec<RandomAccessIter, div_type, data_type>(first, last, bin_cache, 0, bin_sizes);
343  	}
344
345  	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
346  	inline void
347  	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift, compare comp)
348  	{
349  		std::vector<size_t> bin_sizes;
350  		std::vector<RandomAccessIter> bin_cache;
351  		spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(first, last, bin_cache, 0, bin_sizes, shift, comp);
352  	}
353
354  	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
355  	inline void
356  	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift)
357  	{
358  		std::vector<size_t> bin_sizes;
359  		std::vector<RandomAccessIter> bin_cache;
360  		spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift);
361  	}
362  }
363
364  //Top-level sorting call for integers
365  template <class RandomAccessIter>
366  inline void integer_sort(RandomAccessIter first, RandomAccessIter last)
367  {
368  	//Don't sort if it's too small to optimize
369  	if(last - first < detail::MIN_SORT_SIZE)
370  		std::sort(first, last);
371  	else
372  		detail::spread_sort(first, last, *first >> 0, *first);
373  }
374
375  //integer_sort with functors
376  template <class RandomAccessIter, class right_shift, class compare>
377  inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
378  						right_shift shift, compare comp) {
379  	if(last - first < detail::MIN_SORT_SIZE)
380  		std::sort(first, last, comp);
381  	else
382  		detail::spread_sort(first, last, shift(*first, 0), *first, shift, comp);
383  }
384
385  //integer_sort with right_shift functor
386  template <class RandomAccessIter, class right_shift>
387  inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
388  						right_shift shift) {
389  	if(last - first < detail::MIN_SORT_SIZE)
390  		std::sort(first, last);
391  	else
392  		detail::spread_sort(first, last, shift(*first, 0), *first, shift);
393  }
394
395  //------------------------------------------------------ float_sort source --------------------------------------
396  //Casts a RandomAccessIter to the specified data type
397  template<class cast_type, class RandomAccessIter>
398  inline cast_type
399  cast_float_iter(const RandomAccessIter & floatiter)
400  {
401  	cast_type result;
402  	std::memcpy(&result, &(*floatiter), sizeof(cast_type));
403  	return result;
404  }
405
406  //Casts a data element to the specified datinner_float_a type
407  template<class data_type, class cast_type>
408  inline cast_type
409  mem_cast(const data_type & data)
410  {
411  	cast_type result;
412  	std::memcpy(&result, &data, sizeof(cast_type));
413  	return result;
414  }
415
416  namespace detail {
417  	template <class RandomAccessIter, class div_type, class right_shift>
418  	inline void
419  	find_extremes(RandomAccessIter current, RandomAccessIter last, div_type & max, div_type & min, right_shift shift)
420  	{
421  		min = max = shift(*current, 0);
422  		while(++current < last) {
423  			div_type value = shift(*current, 0);
424  			if(max < value)
425  				max = value;
426  			else if(value < min)
427  				min = value;
428  		}
429  	}
430
431  	//Specialized swap loops for floating-point casting
432  	template <class RandomAccessIter, class div_type, class data_type>
433  	inline void inner_float_swap_loop(RandomAccessIter * bins, const RandomAccessIter & nextbinstart, unsigned ii
434  		, const unsigned log_divisor, const div_type div_min)
435  	{
436  		RandomAccessIter * local_bin = bins + ii;
437  		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
438  			for(RandomAccessIter * target_bin = (bins + ((cast_float_iter<div_type, RandomAccessIter>(current) >> log_divisor) - div_min));  target_bin != local_bin;
439  				target_bin = bins + ((cast_float_iter<div_type, RandomAccessIter>(current) >> log_divisor) - div_min)) {
440  				data_type tmp;
441  				RandomAccessIter b = (*target_bin)++;
442  				RandomAccessIter * b_bin = bins + ((cast_float_iter<div_type, RandomAccessIter>(b) >> log_divisor) - div_min);
443  				//Three-way swap; if the item to be swapped doesn't belong in the current bin, swap it to where it belongs
444  				if (b_bin != local_bin) {
445  					RandomAccessIter c = (*b_bin)++;
446  					tmp = *c;
447  					*c = *b;
448  				}
449  				else
450  					tmp = *b;
451  				*b = *current;
452  				*current = tmp;
453  			}
454  		}
455  		*local_bin = nextbinstart;
456  	}
457
458  	template <class RandomAccessIter, class div_type, class data_type>
459  	inline void float_swap_loop(RandomAccessIter * bins, RandomAccessIter & nextbinstart, unsigned ii
460  		, const std::vector<size_t> &bin_sizes, const unsigned log_divisor, const div_type div_min)
461  	{
462  		nextbinstart += bin_sizes[ii];
463  		inner_float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, ii, log_divisor, div_min);
464  	}
465
466  	template <class RandomAccessIter, class cast_type>
467  	inline void
468  	find_extremes(RandomAccessIter current, RandomAccessIter last, cast_type & max, cast_type & min)
469  	{
470  		min = max = cast_float_iter<cast_type, RandomAccessIter>(current);
471  		while(++current < last) {
472  			cast_type value = cast_float_iter<cast_type, RandomAccessIter>(current);
473  			if(max < value)
474  				max = value;
475  			else if(value < min)
476  				min = value;
477  		}
478  	}
479
480  	//Special-case sorting of positive floats with casting instead of a right_shift
481  	template <class RandomAccessIter, class div_type, class data_type>
482  	inline void
483  	positive_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
484  					, std::vector<size_t> &bin_sizes)
485  	{
486  		div_type max, min;
487  		find_extremes(first, last, max, min);
488  		if(max == min)
489  			return;
490  		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
491  		div_type div_min = min >> log_divisor;
492  		div_type div_max = max >> log_divisor;
493  		unsigned bin_count = div_max - div_min + 1;
494  		unsigned cache_end;
495  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
496
497  		//Calculating the size of each bin
498  		for (RandomAccessIter current = first; current != last;)
499  			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
500  		bins[0] = first;
501  		for(unsigned u = 0; u < bin_count - 1; u++)
502  			bins[u + 1] = bins[u] + bin_sizes[u];
503
504  		//Swap into place
505  		RandomAccessIter nextbinstart = first;
506  		for(unsigned u = 0; u < bin_count - 1; ++u)
507  			float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, u, bin_sizes, log_divisor, div_min);
508  		bins[bin_count - 1] = last;
509
510  		//Return if we've completed bucketsorting
511  		if(!log_divisor)
512  			return;
513
514  		//Recursing
515  		size_t max_count = get_max_count(log_divisor, last - first);
516  		RandomAccessIter lastPos = first;
517  		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
518  			size_t count = bin_cache[u] - lastPos;
519  			if(count < 2)
520  				continue;
521  			if(count < max_count)
522  				std::sort(lastPos, bin_cache[u]);
523  			else
524  				positive_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
525  		}
526  	}
527
528  	//Sorting negative_ float_s
529  	//Note that bins are iterated in reverse order because max_neg_float = min_neg_int
530  	template <class RandomAccessIter, class div_type, class data_type>
531  	inline void
532  	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
533  					, std::vector<size_t> &bin_sizes)
534  	{
535  		div_type max, min;
536  		find_extremes(first, last, max, min);
537  		if(max == min)
538  			return;
539  		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
540  		div_type div_min = min >> log_divisor;
541  		div_type div_max = max >> log_divisor;
542  		unsigned bin_count = div_max - div_min + 1;
543  		unsigned cache_end;
544  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
545
546  		//Calculating the size of each bin
547  		for (RandomAccessIter current = first; current != last;)
548  			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
549  		bins[bin_count - 1] = first;
550  		for(int ii = bin_count - 2; ii >= 0; --ii)
551  			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
552
553  		//Swap into place
554  		RandomAccessIter nextbinstart = first;
555  		//The last bin will always have the correct elements in it
556  		for(int ii = bin_count - 1; ii > 0; --ii)
557  			float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, ii, bin_sizes, log_divisor, div_min);
558  		//Since we don't process the last bin, we need to update its end position
559  		bin_cache[cache_offset] = last;
560
561  		//Return if we've completed bucketsorting
562  		if(!log_divisor)
563  			return;
564
565  		//Recursing
566  		size_t max_count = get_max_count(log_divisor, last - first);
567  		RandomAccessIter lastPos = first;
568  		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
569  			size_t count = bin_cache[ii] - lastPos;
570  			if(count < 2)
571  				continue;
572  			if(count < max_count)
573  				std::sort(lastPos, bin_cache[ii]);
574  			else
575  				negative_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
576  		}
577  	}
578
579  	//Sorting negative_ float_s
580  	//Note that bins are iterated in reverse order because max_neg_float = min_neg_int
581  	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
582  	inline void
583  	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
584  					, std::vector<size_t> &bin_sizes, right_shift shift)
585  	{
586  		div_type max, min;
587  		find_extremes(first, last, max, min, shift);
588  		if(max == min)
589  			return;
590  		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
591  		div_type div_min = min >> log_divisor;
592  		div_type div_max = max >> log_divisor;
593  		unsigned bin_count = div_max - div_min + 1;
594  		unsigned cache_end;
595  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
596
597  		//Calculating the size of each bin
598  		for (RandomAccessIter current = first; current != last;)
599  			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
600  		bins[bin_count - 1] = first;
601  		for(int ii = bin_count - 2; ii >= 0; --ii)
602  			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
603
604  		//Swap into place
605  		RandomAccessIter nextbinstart = first;
606  		//The last bin will always have the correct elements in it
607  		for(int ii = bin_count - 1; ii > 0; --ii)
608  			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
609  		//Since we don't process the last bin, we need to update its end position
610  		bin_cache[cache_offset] = last;
611
612  		//Return if we've completed bucketsorting
613  		if(!log_divisor)
614  			return;
615
616  		//Recursing
617  		size_t max_count = get_max_count(log_divisor, last - first);
618  		RandomAccessIter lastPos = first;
619  		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
620  			size_t count = bin_cache[ii] - lastPos;
621  			if(count < 2)
622  				continue;
623  			if(count < max_count)
624  				std::sort(lastPos, bin_cache[ii]);
625  			else
626  				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift);
627  		}
628  	}
629
630  	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
631  	inline void
632  	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
633  					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
634  	{
635  		div_type max, min;
636  		find_extremes(first, last, max, min, shift);
637  		if(max == min)
638  			return;
639  		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
640  		div_type div_min = min >> log_divisor;
641  		div_type div_max = max >> log_divisor;
642  		unsigned bin_count = div_max - div_min + 1;
643  		unsigned cache_end;
644  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
645
646  		//Calculating the size of each bin
647  		for (RandomAccessIter current = first; current != last;)
648  			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
649  		bins[bin_count - 1] = first;
650  		for(int ii = bin_count - 2; ii >= 0; --ii)
651  			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
652
653  		//Swap into place
654  		RandomAccessIter nextbinstart = first;
655  		//The last bin will always have the correct elements in it
656  		for(int ii = bin_count - 1; ii > 0; --ii)
657  			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
658  		//Since we don't process the last bin, we need to update its end position
659  		bin_cache[cache_offset] = last;
660
661  		//Return if we've completed bucketsorting
662  		if(!log_divisor)
663  			return;
664
665  		//Recursing
666  		size_t max_count = get_max_count(log_divisor, last - first);
667  		RandomAccessIter lastPos = first;
668  		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
669  			size_t count = bin_cache[ii] - lastPos;
670  			if(count < 2)
671  				continue;
672  			if(count < max_count)
673  				std::sort(lastPos, bin_cache[ii], comp);
674  			else
675  				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift, comp);
676  		}
677  	}
678
679  	//Casting special-case for floating-point sorting
680  	template <class RandomAccessIter, class div_type, class data_type>
681  	inline void
682  	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
683  					, std::vector<size_t> &bin_sizes)
684  	{
685  		div_type max, min;
686  		find_extremes(first, last, max, min);
687  		if(max == min)
688  			return;
689  		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
690  		div_type div_min = min >> log_divisor;
691  		div_type div_max = max >> log_divisor;
692  		unsigned bin_count = div_max - div_min + 1;
693  		unsigned cache_end;
694  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
695
696  		//Calculating the size of each bin
697  		for (RandomAccessIter current = first; current != last;)
698  			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
699  		//The index of the first positive bin
700  		div_type first_positive = (div_min < 0) ? -div_min : 0;
701  		//Resetting if all bins are negative
702  		if(cache_offset + first_positive > cache_end)
703  			first_positive = cache_end - cache_offset;
704  		//Reversing the order of the negative bins
705  		//Note that because of the negative/positive ordering direction flip
706  		//We can not depend upon bin order and positions matching up
707  		//so bin_sizes must be reused to contain the end of the bin
708  		if(first_positive > 0) {
709  			bins[first_positive - 1] = first;
710  			for(int ii = first_positive - 2; ii >= 0; --ii) {
711  				bins[ii] = first + bin_sizes[ii + 1];
712  				bin_sizes[ii] += bin_sizes[ii + 1];
713  			}
714  			//Handling positives following negatives
715  			if((unsigned)first_positive < bin_count) {
716  				bins[first_positive] = first + bin_sizes[0];
717  				bin_sizes[first_positive] += bin_sizes[0];
718  			}
719  		}
720  		else
721  			bins[0] = first;
722  		for(unsigned u = first_positive; u < bin_count - 1; u++) {
723  			bins[u + 1] = first + bin_sizes[u];
724  			bin_sizes[u + 1] += bin_sizes[u];
725  		}
726
727  		//Swap into place
728  		RandomAccessIter nextbinstart = first;
729  		for(unsigned u = 0; u < bin_count; ++u) {
730  			nextbinstart = first + bin_sizes[u];
731  			inner_float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, u, log_divisor, div_min);
732  		}
733
734  		if(!log_divisor)
735  			return;
736
737  		//Handling negative values first
738  		size_t max_count = get_max_count(log_divisor, last - first);
739  		RandomAccessIter lastPos = first;
740  		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
741  			size_t count = bin_cache[ii] - lastPos;
742  			if(count < 2)
743  				continue;
744  			if(count < max_count)
745  				std::sort(lastPos, bin_cache[ii]);
746  			//sort negative values using reversed-bin spread_sort
747  			else
748  				negative_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
749  		}
750
751  		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
752  			size_t count = bin_cache[u] - lastPos;
753  			if(count < 2)
754  				continue;
755  			if(count < max_count)
756  				std::sort(lastPos, bin_cache[u]);
757  			//sort positive values using normal spread_sort
758  			else
759  				positive_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
760  		}
761  	}
762
763  	//Functor implementation for recursive sorting
764  	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
765  	inline void
766  	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
767  					, std::vector<size_t> &bin_sizes, right_shift shift)
768  	{
769  		div_type max, min;
770  		find_extremes(first, last, max, min, shift);
771  		if(max == min)
772  			return;
773  		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
774  		div_type div_min = min >> log_divisor;
775  		div_type div_max = max >> log_divisor;
776  		unsigned bin_count = div_max - div_min + 1;
777  		unsigned cache_end;
778  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
779
780  		//Calculating the size of each bin
781  		for (RandomAccessIter current = first; current != last;)
782  			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
783  		//The index of the first positive bin
784  		div_type first_positive = (div_min < 0) ? -div_min : 0;
785  		//Resetting if all bins are negative
786  		if(cache_offset + first_positive > cache_end)
787  			first_positive = cache_end - cache_offset;
788  		//Reversing the order of the negative bins
789  		//Note that because of the negative/positive ordering direction flip
790  		//We can not depend upon bin order and positions matching up
791  		//so bin_sizes must be reused to contain the end of the bin
792  		if(first_positive > 0) {
793  			bins[first_positive - 1] = first;
794  			for(int ii = first_positive - 2; ii >= 0; --ii) {
795  				bins[ii] = first + bin_sizes[ii + 1];
796  				bin_sizes[ii] += bin_sizes[ii + 1];
797  			}
798  			//Handling positives following negatives
799  			if((unsigned)first_positive < bin_count) {
800  				bins[first_positive] = first + bin_sizes[0];
801  				bin_sizes[first_positive] += bin_sizes[0];
802  			}
803  		}
804  		else
805  			bins[0] = first;
806  		for(unsigned u = first_positive; u < bin_count - 1; u++) {
807  			bins[u + 1] = first + bin_sizes[u];
808  			bin_sizes[u + 1] += bin_sizes[u];
809  		}
810
811  		//Swap into place
812  		RandomAccessIter nextbinstart = first;
813  		for(unsigned u = 0; u < bin_count; ++u) {
814  			nextbinstart = first + bin_sizes[u];
815  			inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, log_divisor, div_min);
816  		}
817
818  		//Return if we've completed bucketsorting
819  		if(!log_divisor)
820  			return;
821
822  		//Handling negative values first
823  		size_t max_count = get_max_count(log_divisor, last - first);
824  		RandomAccessIter lastPos = first;
825  		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
826  			size_t count = bin_cache[ii] - lastPos;
827  			if(count < 2)
828  				continue;
829  			if(count < max_count)
830  				std::sort(lastPos, bin_cache[ii]);
831  			//sort negative values using reversed-bin spread_sort
832  			else
833  				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift);
834  		}
835
836  		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
837  			size_t count = bin_cache[u] - lastPos;
838  			if(count < 2)
839  				continue;
840  			if(count < max_count)
841  				std::sort(lastPos, bin_cache[u]);
842  			//sort positive values using normal spread_sort
843  			else
844  				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift);
845  		}
846  	}
847
848  	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
849  	inline void
850  	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
851  					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
852  	{
853  		div_type max, min;
854  		find_extremes(first, last, max, min, shift);
855  		if(max == min)
856  			return;
857  		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
858  		div_type div_min = min >> log_divisor;
859  		div_type div_max = max >> log_divisor;
860  		unsigned bin_count = div_max - div_min + 1;
861  		unsigned cache_end;
862  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
863
864  		//Calculating the size of each bin
865  		for (RandomAccessIter current = first; current != last;)
866  			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
867  		//The index of the first positive bin
868  		div_type first_positive = (div_min < 0) ? -div_min : 0;
869  		//Resetting if all bins are negative
870  		if(cache_offset + first_positive > cache_end)
871  			first_positive = cache_end - cache_offset;
872  		//Reversing the order of the negative bins
873  		//Note that because of the negative/positive ordering direction flip
874  		//We can not depend upon bin order and positions matching up
875  		//so bin_sizes must be reused to contain the end of the bin
876  		if(first_positive > 0) {
877  			bins[first_positive - 1] = first;
878  			for(int ii = first_positive - 2; ii >= 0; --ii) {
879  				bins[ii] = first + bin_sizes[ii + 1];
880  				bin_sizes[ii] += bin_sizes[ii + 1];
881  			}
882  			//Handling positives following negatives
883  			if((unsigned)first_positive < bin_count) {
884  				bins[first_positive] = first + bin_sizes[0];
885  				bin_sizes[first_positive] += bin_sizes[0];
886  			}
887  		}
888  		else
889  			bins[0] = first;
890  		for(unsigned u = first_positive; u < bin_count - 1; u++) {
891  			bins[u + 1] = first + bin_sizes[u];
892  			bin_sizes[u + 1] += bin_sizes[u];
893  		}
894
895  		//Swap into place
896  		RandomAccessIter nextbinstart = first;
897  		for(unsigned u = 0; u < bin_count; ++u) {
898  			nextbinstart = first + bin_sizes[u];
899  			inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, log_divisor, div_min);
900  		}
901
902  		//Return if we've completed bucketsorting
903  		if(!log_divisor)
904  			return;
905
906  		//Handling negative values first
907  		size_t max_count = get_max_count(log_divisor, last - first);
908  		RandomAccessIter lastPos = first;
909  		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
910  			size_t count = bin_cache[ii] - lastPos;
911  			if(count < 2)
912  				continue;
913  			if(count < max_count)
914  				std::sort(lastPos, bin_cache[ii]);
915  			//sort negative values using reversed-bin spread_sort
916  			else
917  				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift, comp);
918  		}
919
920  		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
921  			size_t count = bin_cache[u] - lastPos;
922  			if(count < 2)
923  				continue;
924  			if(count < max_count)
925  				std::sort(lastPos, bin_cache[u]);
926  			//sort positive values using normal spread_sort
927  			else
928  				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift, comp);
929  		}
930  	}
931
932  	template <class RandomAccessIter, class cast_type, class data_type>
933  	inline void
934  	float_Sort(RandomAccessIter first, RandomAccessIter last, cast_type, data_type)
935  	{
936  		std::vector<size_t> bin_sizes;
937  		std::vector<RandomAccessIter> bin_cache;
938  		float_sort_rec<RandomAccessIter, cast_type, data_type>(first, last, bin_cache, 0, bin_sizes);
939  	}
940
941  	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
942  	inline void
943  	float_Sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift)
944  	{
945  		std::vector<size_t> bin_sizes;
946  		std::vector<RandomAccessIter> bin_cache;
947  		float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift);
948  	}
949
950  	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
951  	inline void
952  	float_Sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift, compare comp)
953  	{
954  		std::vector<size_t> bin_sizes;
955  		std::vector<RandomAccessIter> bin_cache;
956  		float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift, comp);
957  	}
958  }
959
960  //float_sort with casting
961  //The cast_type must be equal in size to the data type, and must be a signed integer
962  template <class RandomAccessIter, class cast_type>
963  inline void float_sort_cast(RandomAccessIter first, RandomAccessIter last, cast_type cVal)
964  {
965  	if(last - first < detail::MIN_SORT_SIZE)
966  		std::sort(first, last);
967  	else
968  		detail::float_Sort(first, last, cVal, *first);
969  }
970
971  //float_sort with casting to an int
972  //Only use this with IEEE floating-point numbers
973  template <class RandomAccessIter>
974  inline void float_sort_cast_to_int(RandomAccessIter first, RandomAccessIter last)
975  {
976  	int cVal = 0;
977  	float_sort_cast(first, last, cVal);
978  }
979
980  //float_sort with functors
981  template <class RandomAccessIter, class right_shift>
982  inline void float_sort(RandomAccessIter first, RandomAccessIter last, right_shift shift)
983  {
984  	if(last - first < detail::MIN_SORT_SIZE)
985  		std::sort(first, last);
986  	else
987  		detail::float_Sort(first, last, shift(*first, 0), *first, shift);
988  }
989
990  template <class RandomAccessIter, class right_shift, class compare>
991  inline void float_sort(RandomAccessIter first, RandomAccessIter last, right_shift shift, compare comp)
992  {
993  	if(last - first < detail::MIN_SORT_SIZE)
994  		std::sort(first, last, comp);
995  	else
996  		detail::float_Sort(first, last, shift(*first, 0), *first, shift, comp);
997  }
998
999  //------------------------------------------------- string_sort source ---------------------------------------------
1000  namespace detail {
1001  	//Offsetting on identical characters.  This function works a character at a time for optimal worst-case performance.
1002  	template<class RandomAccessIter>
1003  	inline void
1004  	update_offset(RandomAccessIter first, RandomAccessIter finish, unsigned &char_offset)
1005  	{
1006  		unsigned nextOffset = char_offset;
1007  		bool done = false;
1008  		while(!done) {
1009  			RandomAccessIter curr = first;
1010  			do {
1011  				//ignore empties, but if the nextOffset would exceed the length or not match, exit; we've found the last matching character
1012  				if((*curr).size() > char_offset && ((*curr).size() <= (nextOffset + 1) || (*curr)[nextOffset] != (*first)[nextOffset])) {
1013  					done = true;
1014  					break;
1015  				}
1016  			} while(++curr != finish);
1017  			if(!done)
1018  				++nextOffset;
1019  		}
1020  		char_offset = nextOffset;
1021  	}
1022
1023  	//Offsetting on identical characters.  This function works a character at a time for optimal worst-case performance.
1024  	template<class RandomAccessIter, class get_char, class get_length>
1025  	inline void
1026  	update_offset(RandomAccessIter first, RandomAccessIter finish, unsigned &char_offset, get_char getchar, get_length length)
1027  	{
1028  		unsigned nextOffset = char_offset;
1029  		bool done = false;
1030  		while(!done) {
1031  			RandomAccessIter curr = first;
1032  			do {
1033  				//ignore empties, but if the nextOffset would exceed the length or not match, exit; we've found the last matching character
1034  				if(length(*curr) > char_offset && (length(*curr) <= (nextOffset + 1) || getchar((*curr), nextOffset) != getchar((*first), nextOffset))) {
1035  					done = true;
1036  					break;
1037  				}
1038  			} while(++curr != finish);
1039  			if(!done)
1040  				++nextOffset;
1041  		}
1042  		char_offset = nextOffset;
1043  	}
1044
1045  	//A comparison functor for strings that assumes they are identical up to char_offset
1046  	template<class data_type, class unsignedchar_type>
1047  	struct offset_lessthan {
1048  		offset_lessthan(unsigned char_offset) : fchar_offset(char_offset){}
1049  		inline bool operator()(const data_type &x, const data_type &y) const
1050  		{
1051  			unsigned minSize = std::min(x.size(), y.size());
1052  			for(unsigned u = fchar_offset; u < minSize; ++u) {
1053  				if(static_cast<unsignedchar_type>(x[u]) < static_cast<unsignedchar_type>(y[u]))
1054  					return true;
1055  				else if(static_cast<unsignedchar_type>(y[u]) < static_cast<unsignedchar_type>(x[u]))
1056  					return false;
1057  			}
1058  			return x.size() < y.size();
1059  		}
1060  		unsigned fchar_offset;
1061  	};
1062
1063  	//A comparison functor for strings that assumes they are identical up to char_offset
1064  	template<class data_type, class unsignedchar_type>
1065  	struct offset_greaterthan {
1066  		offset_greaterthan(unsigned char_offset) : fchar_offset(char_offset){}
1067  		inline bool operator()(const data_type &x, const data_type &y) const
1068  		{
1069  			unsigned minSize = std::min(x.size(), y.size());
1070  			for(unsigned u = fchar_offset; u < minSize; ++u) {
1071  				if(static_cast<unsignedchar_type>(x[u]) > static_cast<unsignedchar_type>(y[u]))
1072  					return true;
1073  				else if(static_cast<unsignedchar_type>(y[u]) > static_cast<unsignedchar_type>(x[u]))
1074  					return false;
1075  			}
1076  			return x.size() > y.size();
1077  		}
1078  		unsigned fchar_offset;
1079  	};
1080
1081  	//A comparison functor for strings that assumes they are identical up to char_offset
1082  	template<class data_type, class get_char, class get_length>
1083  	struct offset_char_lessthan {
1084  		offset_char_lessthan(unsigned char_offset) : fchar_offset(char_offset){}
1085  		inline bool operator()(const data_type &x, const data_type &y) const
1086  		{
1087  			unsigned minSize = std::min(length(x), length(y));
1088  			for(unsigned u = fchar_offset; u < minSize; ++u) {
1089  				if(getchar(x, u) < getchar(y, u))
1090  					return true;
1091  				else if(getchar(y, u) < getchar(x, u))
1092  					return false;
1093  			}
1094  			return length(x) < length(y);
1095  		}
1096  		unsigned fchar_offset;
1097  		get_char getchar;
1098  		get_length length;
1099  	};
1100
1101  	//String sorting recursive implementation
1102  	template <class RandomAccessIter, class data_type, class unsignedchar_type>
1103  	inline void
1104  	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1105  		, unsigned cache_offset, std::vector<size_t> &bin_sizes)
1106  	{
1107  		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1108  		//Iterate to the end of the empties.  If all empty, return
1109  		while((*first).size() <= char_offset) {
1110  			if(++first == last)
1111  				return;
1112  		}
1113  		RandomAccessIter finish = last - 1;
1114  		//Getting the last non-empty
1115  		for(;(*finish).size() <= char_offset; --finish) { }
1116  		++finish;
1117  		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
1118  		update_offset(first, finish, char_offset);
1119
1120  		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1121  		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1122  		const unsigned max_size = bin_count;
1123  		const unsigned membin_count = bin_count + 1;
1124  		unsigned cache_end;
1125  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
1126
1127  		//Calculating the size of each bin; this takes roughly 10% of runtime
1128  		for (RandomAccessIter current = first; current != last; ++current) {
1129  			if((*current).size() <= char_offset) {
1130  				bin_sizes[0]++;
1131  			}
1132  			else
1133  				bin_sizes[static_cast<unsignedchar_type>((*current)[char_offset]) + 1]++;
1134  		}
1135  		//Assign the bin positions
1136  		bin_cache[cache_offset] = first;
1137  		for(unsigned u = 0; u < membin_count - 1; u++)
1138  			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1139
1140  		//Swap into place
1141  		RandomAccessIter nextbinstart = first;
1142  		//handling empty bins
1143  		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
1144  		nextbinstart +=	bin_sizes[0];
1145  		RandomAccessIter * target_bin;
1146  		//Iterating over each element in the bin of empties
1147  		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1148  			//empties belong in this bin
1149  			while((*current).size() > char_offset) {
1150  				target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]);
1151  				iter_swap(current, (*target_bin)++);
1152  			}
1153  		}
1154  		*local_bin = nextbinstart;
1155  		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1156  		unsigned last_bin = bin_count - 1;
1157  		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
1158  		//This dominates runtime, mostly in the swap and bin lookups
1159  		for(unsigned u = 0; u < last_bin; ++u) {
1160  			local_bin = bins + u;
1161  			nextbinstart += bin_sizes[u + 1];
1162  			//Iterating over each element in this bin
1163  			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1164  				//Swapping elements in current into place until the correct element has been swapped in
1165  				for(target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]);  target_bin != local_bin;
1166  					target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]))
1167  					iter_swap(current, (*target_bin)++);
1168  			}
1169  			*local_bin = nextbinstart;
1170  		}
1171  		bins[last_bin] = last;
1172  		//Recursing
1173  		RandomAccessIter lastPos = bin_cache[cache_offset];
1174  		//Skip this loop for empties
1175  		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
1176  			size_t count = bin_cache[u] - lastPos;
1177  			//don't sort unless there are at least two items to compare
1178  			if(count < 2)
1179  				continue;
1180  			//using std::sort if its worst-case is better
1181  			if(count < max_size)
1182  				std::sort(lastPos, bin_cache[u], offset_lessthan<data_type, unsignedchar_type>(char_offset + 1));
1183  			else
1184  				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
1185  		}
1186  	}
1187
1188  	//Sorts strings in reverse order, with empties at the end
1189  	template <class RandomAccessIter, class data_type, class unsignedchar_type>
1190  	inline void
1191  	reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1192  		, unsigned cache_offset, std::vector<size_t> &bin_sizes)
1193  	{
1194  		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1195  		RandomAccessIter curr = first;
1196  		//Iterate to the end of the empties.  If all empty, return
1197  		while((*curr).size() <= char_offset) {
1198  			if(++curr == last)
1199  				return;
1200  		}
1201  		//Getting the last non-empty
1202  		while((*(--last)).size() <= char_offset) { }
1203  		++last;
1204  		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
1205  		update_offset(curr, last, char_offset);
1206  		RandomAccessIter * target_bin;
1207
1208  		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1209  		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1210  		const unsigned max_size = bin_count;
1211  		const unsigned membin_count = bin_count + 1;
1212  		const unsigned max_bin = bin_count - 1;
1213  		unsigned cache_end;
1214  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count);
1215  		RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]);
1216
1217  		//Calculating the size of each bin; this takes roughly 10% of runtime
1218  		for (RandomAccessIter current = first; current != last; ++current) {
1219  			if((*current).size() <= char_offset) {
1220  				bin_sizes[bin_count]++;
1221  			}
1222  			else
1223  				bin_sizes[max_bin - static_cast<unsignedchar_type>((*current)[char_offset])]++;
1224  		}
1225  		//Assign the bin positions
1226  		bin_cache[cache_offset] = first;
1227  		for(unsigned u = 0; u < membin_count - 1; u++)
1228  			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1229
1230  		//Swap into place
1231  		RandomAccessIter nextbinstart = last;
1232  		//handling empty bins
1233  		RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
1234  		RandomAccessIter lastFull = *local_bin;
1235  		//Iterating over each element in the bin of empties
1236  		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1237  			//empties belong in this bin
1238  			while((*current).size() > char_offset) {
1239  				target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]);
1240  				iter_swap(current, (*target_bin)++);
1241  			}
1242  		}
1243  		*local_bin = nextbinstart;
1244  		nextbinstart = first;
1245  		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1246  		unsigned last_bin = max_bin;
1247  		for(; last_bin && !bin_sizes[last_bin]; --last_bin) { }
1248  		//This dominates runtime, mostly in the swap and bin lookups
1249  		for(unsigned u = 0; u < last_bin; ++u) {
1250  			local_bin = bins + u;
1251  			nextbinstart += bin_sizes[u];
1252  			//Iterating over each element in this bin
1253  			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1254  				//Swapping elements in current into place until the correct element has been swapped in
1255  				for(target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]);  target_bin != local_bin;
1256  					target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]))
1257  					iter_swap(current, (*target_bin)++);
1258  			}
1259  			*local_bin = nextbinstart;
1260  		}
1261  		bins[last_bin] = lastFull;
1262  		//Recursing
1263  		RandomAccessIter lastPos = first;
1264  		//Skip this loop for empties
1265  		for(unsigned u = cache_offset; u <= cache_offset + last_bin; lastPos = bin_cache[u], ++u) {
1266  			size_t count = bin_cache[u] - lastPos;
1267  			//don't sort unless there are at least two items to compare
1268  			if(count < 2)
1269  				continue;
1270  			//using std::sort if its worst-case is better
1271  			if(count < max_size)
1272  				std::sort(lastPos, bin_cache[u], offset_greaterthan<data_type, unsignedchar_type>(char_offset + 1));
1273  			else
1274  				reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
1275  		}
1276  	}
1277
1278  	//String sorting recursive implementation
1279  	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length>
1280  	inline void
1281  	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1282  		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length)
1283  	{
1284  		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1285  		//Iterate to the end of the empties.  If all empty, return
1286  		while(length(*first) <= char_offset) {
1287  			if(++first == last)
1288  				return;
1289  		}
1290  		RandomAccessIter finish = last - 1;
1291  		//Getting the last non-empty
1292  		for(;length(*finish) <= char_offset; --finish) { }
1293  		++finish;
1294  		update_offset(first, finish, char_offset, getchar, length);
1295
1296  		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1297  		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1298  		const unsigned max_size = bin_count;
1299  		const unsigned membin_count = bin_count + 1;
1300  		unsigned cache_end;
1301  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
1302
1303  		//Calculating the size of each bin; this takes roughly 10% of runtime
1304  		for (RandomAccessIter current = first; current != last; ++current) {
1305  			if(length(*current) <= char_offset) {
1306  				bin_sizes[0]++;
1307  			}
1308  			else
1309  				bin_sizes[getchar((*current), char_offset) + 1]++;
1310  		}
1311  		//Assign the bin positions
1312  		bin_cache[cache_offset] = first;
1313  		for(unsigned u = 0; u < membin_count - 1; u++)
1314  			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1315
1316  		//Swap into place
1317  		RandomAccessIter nextbinstart = first;
1318  		//handling empty bins
1319  		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
1320  		nextbinstart +=	bin_sizes[0];
1321  		RandomAccessIter * target_bin;
1322  		//Iterating over each element in the bin of empties
1323  		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1324  			//empties belong in this bin
1325  			while(length(*current) > char_offset) {
1326  				target_bin = bins + getchar((*current), char_offset);
1327  				iter_swap(current, (*target_bin)++);
1328  			}
1329  		}
1330  		*local_bin = nextbinstart;
1331  		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1332  		unsigned last_bin = bin_count - 1;
1333  		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
1334  		//This dominates runtime, mostly in the swap and bin lookups
1335  		for(unsigned ii = 0; ii < last_bin; ++ii) {
1336  			local_bin = bins + ii;
1337  			nextbinstart += bin_sizes[ii + 1];
1338  			//Iterating over each element in this bin
1339  			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1340  				//Swapping elements in current into place until the correct element has been swapped in
1341  				for(target_bin = bins + getchar((*current), char_offset);  target_bin != local_bin;
1342  					target_bin = bins + getchar((*current), char_offset))
1343  					iter_swap(current, (*target_bin)++);
1344  			}
1345  			*local_bin = nextbinstart;
1346  		}
1347  		bins[last_bin] = last;
1348
1349  		//Recursing
1350  		RandomAccessIter lastPos = bin_cache[cache_offset];
1351  		//Skip this loop for empties
1352  		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
1353  			size_t count = bin_cache[u] - lastPos;
1354  			//don't sort unless there are at least two items to compare
1355  			if(count < 2)
1356  				continue;
1357  			//using std::sort if its worst-case is better
1358  			if(count < max_size)
1359  				std::sort(lastPos, bin_cache[u], offset_char_lessthan<data_type, get_char, get_length>(char_offset + 1));
1360  			else
1361  				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length);
1362  		}
1363  	}
1364
1365  	//String sorting recursive implementation
1366  	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length, class compare>
1367  	inline void
1368  	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1369  		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length, compare comp)
1370  	{
1371  		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1372  		//Iterate to the end of the empties.  If all empty, return
1373  		while(length(*first) <= char_offset) {
1374  			if(++first == last)
1375  				return;
1376  		}
1377  		RandomAccessIter finish = last - 1;
1378  		//Getting the last non-empty
1379  		for(;length(*finish) <= char_offset; --finish) { }
1380  		++finish;
1381  		update_offset(first, finish, char_offset, getchar, length);
1382
1383  		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1384  		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1385  		const unsigned max_size = bin_count;
1386  		const unsigned membin_count = bin_count + 1;
1387  		unsigned cache_end;
1388  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
1389
1390  		//Calculating the size of each bin; this takes roughly 10% of runtime
1391  		for (RandomAccessIter current = first; current != last; ++current) {
1392  			if(length(*current) <= char_offset) {
1393  				bin_sizes[0]++;
1394  			}
1395  			else
1396  				bin_sizes[getchar((*current), char_offset) + 1]++;
1397  		}
1398  		//Assign the bin positions
1399  		bin_cache[cache_offset] = first;
1400  		for(unsigned u = 0; u < membin_count - 1; u++)
1401  			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1402
1403  		//Swap into place
1404  		RandomAccessIter nextbinstart = first;
1405  		//handling empty bins
1406  		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
1407  		nextbinstart +=	bin_sizes[0];
1408  		RandomAccessIter * target_bin;
1409  		//Iterating over each element in the bin of empties
1410  		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1411  			//empties belong in this bin
1412  			while(length(*current) > char_offset) {
1413  				target_bin = bins + getchar((*current), char_offset);
1414  				iter_swap(current, (*target_bin)++);
1415  			}
1416  		}
1417  		*local_bin = nextbinstart;
1418  		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1419  		unsigned last_bin = bin_count - 1;
1420  		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
1421  		//This dominates runtime, mostly in the swap and bin lookups
1422  		for(unsigned u = 0; u < last_bin; ++u) {
1423  			local_bin = bins + u;
1424  			nextbinstart += bin_sizes[u + 1];
1425  			//Iterating over each element in this bin
1426  			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1427  				//Swapping elements in current into place until the correct element has been swapped in
1428  				for(target_bin = bins + getchar((*current), char_offset);  target_bin != local_bin;
1429  					target_bin = bins + getchar((*current), char_offset))
1430  					iter_swap(current, (*target_bin)++);
1431  			}
1432  			*local_bin = nextbinstart;
1433  		}
1434  		bins[last_bin] = last;
1435
1436  		//Recursing
1437  		RandomAccessIter lastPos = bin_cache[cache_offset];
1438  		//Skip this loop for empties
1439  		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
1440  			size_t count = bin_cache[u] - lastPos;
1441  			//don't sort unless there are at least two items to compare
1442  			if(count < 2)
1443  				continue;
1444  			//using std::sort if its worst-case is better
1445  			if(count < max_size)
1446  				std::sort(lastPos, bin_cache[u], comp);
1447  			else
1448  				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(lastPos
1449  					, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length, comp);
1450  		}
1451  	}
1452
1453  	//Sorts strings in reverse order, with empties at the end
1454  	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length, class compare>
1455  	inline void
1456  	reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
1457  		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length, compare comp)
1458  	{
1459  		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
1460  		RandomAccessIter curr = first;
1461  		//Iterate to the end of the empties.  If all empty, return
1462  		while(length(*curr) <= char_offset) {
1463  			if(++curr == last)
1464  				return;
1465  		}
1466  		//Getting the last non-empty
1467  		while(length(*(--last)) <= char_offset) { }
1468  		++last;
1469  		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
1470  		update_offset(first, last, char_offset, getchar, length);
1471
1472  		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
1473  		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
1474  		const unsigned max_size = bin_count;
1475  		const unsigned membin_count = bin_count + 1;
1476  		const unsigned max_bin = bin_count - 1;
1477  		unsigned cache_end;
1478  		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count);
1479  		RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]);
1480
1481  		//Calculating the size of each bin; this takes roughly 10% of runtime
1482  		for (RandomAccessIter current = first; current != last; ++current) {
1483  			if(length(*current) <= char_offset) {
1484  				bin_sizes[bin_count]++;
1485  			}
1486  			else
1487  				bin_sizes[max_bin - getchar((*current), char_offset)]++;
1488  		}
1489  		//Assign the bin positions
1490  		bin_cache[cache_offset] = first;
1491  		for(unsigned u = 0; u < membin_count - 1; u++)
1492  			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
1493
1494  		//Swap into place
1495  		RandomAccessIter nextbinstart = last;
1496  		//handling empty bins
1497  		RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
1498  		RandomAccessIter lastFull = *local_bin;
1499  		RandomAccessIter * target_bin;
1500  		//Iterating over each element in the bin of empties
1501  		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1502  			//empties belong in this bin
1503  			while(length(*current) > char_offset) {
1504  				target_bin = end_bin - getchar((*current), char_offset);
1505  				iter_swap(current, (*target_bin)++);
1506  			}
1507  		}
1508  		*local_bin = nextbinstart;
1509  		nextbinstart = first;
1510  		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
1511  		unsigned last_bin = max_bin;
1512  		for(; last_bin && !bin_sizes[last_bin]; --last_bin) { }
1513  		//This dominates runtime, mostly in the swap and bin lookups
1514  		for(unsigned u = 0; u < last_bin; ++u) {
1515  			local_bin = bins + u;
1516  			nextbinstart += bin_sizes[u];
1517  			//Iterating over each element in this bin
1518  			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
1519  				//Swapping elements in current into place until the correct element has been swapped in
1520  				for(target_bin = end_bin - getchar((*current), char_offset);  target_bin != local_bin;
1521  					target_bin = end_bin - getchar((*current), char_offset))
1522  					iter_swap(current, (*target_bin)++);
1523  			}
1524  			*local_bin = nextbinstart;
1525  		}
1526  		bins[last_bin] = lastFull;
1527  		//Recursing
1528  		RandomAccessIter lastPos = first;
1529  		//Skip this loop for empties
1530  		for(unsigned u = cache_offset; u <= cache_offset + last_bin; lastPos = bin_cache[u], ++u) {
1531  			size_t count = bin_cache[u] - lastPos;
1532  			//don't sort unless there are at least two items to compare
1533  			if(count < 2)
1534  				continue;
1535  			//using std::sort if its worst-case is better
1536  			if(count < max_size)
1537  				std::sort(lastPos, bin_cache[u], comp);
1538  			else
1539  				reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(lastPos
1540  					, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length, comp);
1541  		}
1542  	}
1543
1544  	//Holds the bin vector and makes the initial recursive call
1545  	template <class RandomAccessIter, class data_type, class unsignedchar_type>
1546  	inline void
1547  	string_sort(RandomAccessIter first, RandomAccessIter last, data_type, unsignedchar_type)
1548  	{
1549  		std::vector<size_t> bin_sizes;
1550  		std::vector<RandomAccessIter> bin_cache;
1551  		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(first, last, 0, bin_cache, 0, bin_sizes);
1552  	}
1553
1554  	//Holds the bin vector and makes the initial recursive call
1555  	template <class RandomAccessIter, class data_type, class unsignedchar_type>
1556  	inline void
1557  	reverse_string_sort(RandomAccessIter first, RandomAccessIter last, data_type, unsignedchar_type)
1558  	{
1559  		std::vector<size_t> bin_sizes;
1560  		std::vector<RandomAccessIter> bin_cache;
1561  		reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(first, last, 0, bin_cache, 0, bin_sizes);
1562  	}
1563
1564  	//Holds the bin vector and makes the initial recursive call
1565  	template <class RandomAccessIter, class get_char, class get_length, class data_type, class unsignedchar_type>
1566  	inline void
1567  	string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, data_type, unsignedchar_type)
1568  	{
1569  		std::vector<size_t> bin_sizes;
1570  		std::vector<RandomAccessIter> bin_cache;
1571  		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length);
1572  	}
1573
1574  	//Holds the bin vector and makes the initial recursive call
1575  	template <class RandomAccessIter, class get_char, class get_length, class compare, class data_type, class unsignedchar_type>
1576  	inline void
1577  	string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp, data_type, unsignedchar_type)
1578  	{
1579  		std::vector<size_t> bin_sizes;
1580  		std::vector<RandomAccessIter> bin_cache;
1581  		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
1582  	}
1583
1584  	//Holds the bin vector and makes the initial recursive call
1585  	template <class RandomAccessIter, class get_char, class get_length, class compare, class data_type, class unsignedchar_type>
1586  	inline void
1587  	reverse_string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp, data_type, unsignedchar_type)
1588  	{
1589  		std::vector<size_t> bin_sizes;
1590  		std::vector<RandomAccessIter> bin_cache;
1591  		reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
1592  	}
1593  }
1594
1595  //Allows character-type overloads
1596  template <class RandomAccessIter, class unsignedchar_type>
1597  inline void string_sort(RandomAccessIter first, RandomAccessIter last, unsignedchar_type unused)
1598  {
1599  	//Don't sort if it's too small to optimize
1600  	if(last - first < detail::MIN_SORT_SIZE)
1601  		std::sort(first, last);
1602  	else
1603  		detail::string_sort(first, last, *first, unused);
1604  }
1605
1606  //Top-level sorting call; wraps using default of unsigned char
1607  template <class RandomAccessIter>
1608  inline void string_sort(RandomAccessIter first, RandomAccessIter last)
1609  {
1610  	unsigned char unused = '\0';
1611  	string_sort(first, last, unused);
1612  }
1613
1614  //Allows character-type overloads
1615  template <class RandomAccessIter, class compare, class unsignedchar_type>
1616  inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, compare comp, unsignedchar_type unused)
1617  {
1618  	//Don't sort if it's too small to optimize
1619  	if(last - first < detail::MIN_SORT_SIZE)
1620  		std::sort(first, last, comp);
1621  	else
1622  		detail::reverse_string_sort(first, last, *first, unused);
1623  }
1624
1625  //Top-level sorting call; wraps using default of unsigned char
1626  template <class RandomAccessIter, class compare>
1627  inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, compare comp)
1628  {
1629  	unsigned char unused = '\0';
1630  	reverse_string_sort(first, last, comp, unused);
1631  }
1632
1633  template <class RandomAccessIter, class get_char, class get_length>
1634  inline void string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length)
1635  {
1636  	//Don't sort if it's too small to optimize
1637  	if(last - first < detail::MIN_SORT_SIZE)
1638  		std::sort(first, last);
1639  	else {
1640  		//skipping past empties at the beginning, which allows us to get the character type
1641  		//.empty() is not used so as not to require a user declaration of it
1642  		while(!length(*first)) {
1643  			if(++first == last)
1644  				return;
1645  		}
1646  		detail::string_sort(first, last, getchar, length, *first, getchar((*first), 0));
1647  	}
1648  }
1649
1650  template <class RandomAccessIter, class get_char, class get_length, class compare>
1651  inline void string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp)
1652  {
1653  	//Don't sort if it's too small to optimize
1654  	if(last - first < detail::MIN_SORT_SIZE)
1655  		std::sort(first, last, comp);
1656  	else {
1657  		//skipping past empties at the beginning, which allows us to get the character type
1658  		//.empty() is not used so as not to require a user declaration of it
1659  		while(!length(*first)) {
1660  			if(++first == last)
1661  				return;
1662  		}
1663  		detail::string_sort(first, last, getchar, length, comp, *first, getchar((*first), 0));
1664  	}
1665  }
1666
1667  template <class RandomAccessIter, class get_char, class get_length, class compare>
1668  inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp)
1669  {
1670  	//Don't sort if it's too small to optimize
1671  	if(last - first < detail::MIN_SORT_SIZE)
1672  		std::sort(first, last, comp);
1673  	else {
1674  		//skipping past empties at the beginning, which allows us to get the character type
1675  		//.empty() is not used so as not to require a user declaration of it
1676  		while(!length(*(--last))) {
1677  			//Note: if there is just one non-empty, and it's at the beginning, then it's already in sorted order
1678  			if(first == last)
1679  				return;
1680  		}
1681  		//making last just after the end of the non-empty part of the array
1682  		++last;
1683  		detail::reverse_string_sort(first, last, getchar, length, comp, *first, getchar((*first), 0));
1684  	}
1685  }
1686}
1687
1688#endif
1689