sort.c revision 3a9241add947cb6d24b5de7a8927517426a78795
1/* vi: set sw=4 ts=4:
2 *
3 * sort.c - put input lines into order
4 *
5 * Copyright 2004, 2008 Rob Landley <rob@landley.net>
6 *
7 * See http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html
8
9USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")USE_SORT_BIG("S:T:m" "o:k*t:xbMcszdfi") "run", TOYFLAG_USR|TOYFLAG_BIN))
10
11config SORT
12    bool "sort"
13    default y
14    help
15      usage: sort [-run] [FILE...]
16
17      Sort all lines of text from input files (or stdin) to stdout.
18
19      -r    reverse
20      -u    unique lines only
21      -n    numeric order (instead of alphabetical)
22
23config SORT_BIG
24    bool "SuSv3 options (Support -ktcsbdfiozM)"
25    default y
26    depends on SORT
27    help
28      usage: sort [-bcdfiMsz] [-k#[,#[x]] [-t X]] [-o FILE]
29
30      -b    ignore leading blanks (or trailing blanks in second part of key)
31      -c    check whether input is sorted
32      -d    dictionary order (use alphanumeric and whitespace chars only)
33      -f    force uppercase (case insensitive sort)
34      -i    ignore nonprinting characters
35      -M    month sort (jan, feb, etc).
36      -x    Hexadecimal numerical sort
37      -s    skip fallback sort (only sort with keys)
38      -z    zero (null) terminated input
39      -k    sort by "key" (see below)
40      -t    use a key separator other than whitespace
41      -o    output to FILE instead of stdout
42
43      Sorting by key looks at a subset of the words on each line.  -k2
44      uses the second word to the end of the line, -k2,2 looks at only
45      the second word, -k2,4 looks from the start of the second to the end
46      of the fourth word.  Specifying multiple keys uses the later keys as
47      tie breakers, in order.  A type specifier appended to a sort key
48          (such as -2,2n) applies only to sorting that key.
49
50config SORT_FLOAT
51    bool "Floating point (-g)"
52    default y
53    depends on SORT_BIG
54    help
55      usage: sort [-g]
56
57      This version of sort requires floating point.
58
59      -g    general numeric sort (double precision with nan and inf)
60
61*/
62
63#include "toys.h"
64
65DEFINE_GLOBALS(
66    char *key_separator;
67    struct arg_list *raw_keys;
68    char *outfile;
69    char *ignore1, ignore2;   // GNU compatability NOPs for -S and -T.
70
71    void *key_list;
72    int linecount;
73    char **lines;
74)
75
76#define TT this.sort
77
78// The sort types are n, g, and M.
79// u, c, s, and z apply to top level only, not to keys.
80// b at top level implies bb.
81// The remaining options can be applied to search keys.
82
83#define FLAG_n  (1<<0)  // Sort type: numeric
84#define FLAG_u  (1<<1)  // Unique
85#define FLAG_r  (1<<2)  // Reverse output order
86
87#define FLAG_i  (1<<3)  // Ignore !isprint()
88#define FLAG_f  (1<<4)  // Force uppercase
89#define FLAG_d  (1<<5)  // Ignore !(isalnum()|isspace())
90#define FLAG_z  (1<<6)  // Input is null terminated, not \n
91#define FLAG_s  (1<<7)  // Stable sort, no ascii fallback at end
92#define FLAG_c  (1<<8)  // Check only.  No output, exit(!ordered)
93#define FLAG_M  (1<<9)  // Sort type: date
94#define FLAG_b  (1<<10) // Ignore leading blanks
95#define FLAG_x  (1<<11) // Hex sort
96#define FLAG_g  (1<<18) // Sort type: strtod()
97
98// Left off dealing with FLAG_b/FLAG_bb logic...
99
100#define FLAG_bb (1<<31)  // Ignore trailing blanks
101
102struct sort_key
103{
104    struct sort_key *next_key;  // linked list
105    unsigned range[4];          // start word, start char, end word, end char
106    int flags;
107};
108
109// Copy of the part of this string corresponding to a key/flags.
110
111static char *get_key_data(char *str, struct sort_key *key, int flags)
112{
113    int start=0, end, len, i, j;
114
115    // Special case whole string, so we don't have to make a copy
116
117    if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
118        && !(flags&(FLAG_b&FLAG_d&FLAG_f&FLAG_i&FLAG_bb))) return str;
119
120    // Find start of key on first pass, end on second pass
121
122    len = strlen(str);
123    for (j=0; j<2; j++) {
124        if (!key->range[2*j]) end=len;
125
126        // Loop through fields
127        else {
128            end=0;
129            for (i=1; i < key->range[2*j]+j; i++) {
130
131                // Skip leading blanks
132                if (str[end] && !TT.key_separator)
133                    while (isspace(str[end])) end++;
134
135                // Skip body of key
136                for (; str[end]; end++) {
137                    if (TT.key_separator) {
138                        if (str[end]==*TT.key_separator) break;
139                    } else if (isspace(str[end])) break;
140                }
141            }
142        }
143        if (!j) start=end;
144    }
145
146    // Key with explicit separator starts after the separator
147    if (TT.key_separator && str[start]==*TT.key_separator) start++;
148
149    // Strip leading and trailing whitespace if necessary
150    if (flags&FLAG_b) while (isspace(str[start])) start++;
151    if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
152
153    // Handle offsets on start and end
154    if (key->range[3]) {
155        end += key->range[3]-1;
156        if (end>len) end=len;
157    }
158    if (key->range[1]) {
159        start += key->range[1]-1;
160        if (start>len) start=len;
161    }
162
163    // Make the copy
164    if (end<start) end=start;
165    str = xstrndup(str+start, end-start);
166
167    // Handle -d
168    if (flags&FLAG_d) {
169        for (start = end = 0; str[end]; end++)
170            if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
171        str[start] = 0;
172    }
173
174    // Handle -i
175    if (flags&FLAG_i) {
176        for (start = end = 0; str[end]; end++)
177            if (isprint(str[end])) str[start++] = str[end];
178        str[start] = 0;
179    }
180
181    // Handle -f
182    if (flags*FLAG_f) for(i=0; str[i]; i++) str[i] = toupper(str[i]);
183
184    return str;
185}
186
187// append a sort_key to key_list.
188
189static struct sort_key *add_key(void)
190{
191    void **stupid_compiler = &TT.key_list;
192    struct sort_key **pkey = (struct sort_key **)stupid_compiler;
193
194    while (*pkey) pkey = &((*pkey)->next_key);
195    return *pkey = xzalloc(sizeof(struct sort_key));
196}
197
198// Perform actual comparison
199static int compare_values(int flags, char *x, char *y)
200{
201    int ff = flags & (FLAG_n|FLAG_g|FLAG_M|FLAG_x);
202
203    // Ascii sort
204    if (!ff) return strcmp(x, y);
205
206    if (CFG_SORT_FLOAT && ff == FLAG_g) {
207        char *xx,*yy;
208        double dx = strtod(x,&xx), dy = strtod(y,&yy);
209        int xinf, yinf;
210
211        // not numbers < NaN < -infinity < numbers < +infinity
212
213        if (x==xx) return y==yy ? 0 : -1;
214        if (y==yy) return 1;
215
216        // Check for isnan
217        if (dx!=dx) return (dy!=dy) ? 0 : -1;
218        if (dy!=dy) return 1;
219
220        // Check for infinity.  (Could underflow, but avoids needing libm.)
221        xinf = (1.0/dx == 0.0);
222        yinf = (1.0/dy == 0.0);
223        if (xinf) {
224            if(dx<0) return (yinf && dy<0) ? 0 : -1;
225            return (yinf && dy>0) ? 0 : 1;
226        }
227        if (yinf) return dy<0 ? 1 : -1;
228
229        return dx>dy ? 1 : (dx<dy ? -1 : 0);
230    } else if (CFG_SORT_BIG && ff == FLAG_M) {
231        struct tm thyme;
232        int dx;
233        char *xx,*yy;
234
235        xx = strptime(x,"%b",&thyme);
236        dx = thyme.tm_mon;
237        yy = strptime(y,"%b",&thyme);
238        if (!xx) return !yy ? 0 : -1;
239        else if (!yy) return 1;
240        else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
241
242    } else if (CFG_SORT_BIG && ff == FLAG_x) {
243        return strtol(x, NULL, 16)-strtol(y, NULL, 16);
244    // This has to be ff == FLAG_n
245    } else {
246        // Full floating point version of -n
247        if (CFG_SORT_FLOAT) {
248            double dx = atof(x), dy = atof(y);
249
250            return dx>dy ? 1 : (dx<dy ? -1 : 0);
251        // Integer version of -n for tiny systems
252        } else return atoi(x)-atoi(y);
253    }
254}
255
256
257// Callback from qsort(): Iterate through key_list and perform comparisons.
258static int compare_keys(const void *xarg, const void *yarg)
259{
260    int flags = toys.optflags, retval = 0;
261    char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
262    struct sort_key *key;
263
264    if (CFG_SORT_BIG) {
265        for (key=(struct sort_key *)TT.key_list; !retval && key;
266             key = key->next_key)
267        {
268            flags = key->flags ? key->flags : toys.optflags;
269
270            // Chop out and modify key chunks, handling -dfib
271
272            x = get_key_data(xx, key, flags);
273            y = get_key_data(yy, key, flags);
274
275            retval = compare_values(flags, x, y);
276
277            // Free the copies get_key_data() made.
278
279            if (x != xx) free(x);
280            if (y != yy) free(y);
281
282            if (retval) break;
283        }
284    } else retval = compare_values(flags, xx, yy);
285
286    // Perform fallback sort if necessary
287    if (!retval && !(CFG_SORT_BIG && (toys.optflags&FLAG_s))) {
288        retval = strcmp(xx, yy);
289        flags = toys.optflags;
290    }
291
292    return retval * ((flags&FLAG_r) ? -1 : 1);
293}
294
295// Callback from loopfiles to handle input files.
296static void sort_read(int fd, char *name)
297{
298    // Read each line from file, appending to a big array.
299
300    for (;;) {
301        char * line = (CFG_SORT_BIG && (toys.optflags&FLAG_z))
302                       ? get_rawline(fd, NULL, 0) : get_line(fd);
303
304        if (!line) break;
305
306        // handle -c here so we don't allocate more memory than necessary.
307        if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) {
308            int j = (toys.optflags&FLAG_u) ? -1 : 0;
309
310            if (TT.lines && compare_keys((void *)&TT.lines, &line)>j)
311                error_exit("%s: Check line %d\n", name, TT.linecount);
312            free(TT.lines);
313            TT.lines = (char **)line;
314        } else {
315            if (!(TT.linecount&63))
316                TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
317            TT.lines[TT.linecount] = line;
318        }
319        TT.linecount++;
320    }
321}
322
323void sort_main(void)
324{
325    int idx, fd = 1;
326
327    // Open output file if necessary.
328    if (CFG_SORT_BIG && TT.outfile)
329        fd = xcreate(TT.outfile, O_CREAT|O_TRUNC|O_WRONLY, 0666);
330
331    // Parse -k sort keys.
332    if (CFG_SORT_BIG && TT.raw_keys) {
333        struct arg_list *arg;
334
335        for (arg = TT.raw_keys; arg; arg = arg->next) {
336            struct sort_key *key = add_key();
337            char *temp;
338            int flag;
339
340            idx = 0;
341            temp = arg->arg;
342            while (*temp) {
343                // Start of range
344                key->range[2*idx] = (unsigned)strtol(temp, &temp, 10);
345                if (*temp=='.')
346                    key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10);
347
348                // Handle flags appended to a key type.
349                for (;*temp;temp++) {
350                    char *temp2, *optlist;
351
352                    // Note that a second comma becomes an "Unknown key" error.
353
354                    if (*temp==',' && !idx++) {
355                        temp++;
356                        break;
357                    }
358
359                    // Which flag is this?
360
361                    optlist = toys.which->options;
362                    temp2 = strchr(optlist, *temp);
363                    flag = (1<<(optlist-temp2+strlen(optlist)-1));
364
365                    // Was it a flag that can apply to a key?
366
367                    if (!temp2 || flag>FLAG_b
368                        || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z)))
369                    {
370                        error_exit("Unknown key option.");
371                    }
372                    // b after , means strip _trailing_ space, not leading.
373                    if (idx && flag==FLAG_b) flag = FLAG_bb;
374                    key->flags |= flag;
375                }
376            }
377        }
378    }
379
380    // global b flag strips both leading and trailing spaces
381    if (toys.optflags&FLAG_b) toys.optflags |= FLAG_bb;
382
383    // If no keys, perform alphabetic sort over the whole line.
384    if (CFG_SORT_BIG && !TT.key_list) add_key()->range[0] = 1;
385
386    // Open input files and read data, populating TT.lines[TT.linecount]
387    loopfiles(toys.optargs, sort_read);
388
389    // The compare (-c) logic was handled in sort_read(),
390    // so if we got here, we're done.
391    if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) goto exit_now;
392
393    // Perform the actual sort
394    qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
395
396    // handle unique (-u)
397    if (toys.optflags&FLAG_u) {
398        int jdx;
399
400        for (jdx=0, idx=1; idx<TT.linecount; idx++) {
401            if (!compare_keys(&TT.lines[jdx], &TT.lines[idx]))
402                free(TT.lines[idx]);
403            else TT.lines[++jdx] = TT.lines[idx];
404        }
405        if (TT.linecount) TT.linecount = jdx+1;
406    }
407
408    // Output result
409    for (idx = 0; idx<TT.linecount; idx++) {
410        char *s = TT.lines[idx];
411        xwrite(fd, s, strlen(s));
412        if (CFG_TOYBOX_FREE) free(s);
413        xwrite(fd, "\n", 1);
414    }
415
416exit_now:
417    if (CFG_TOYBOX_FREE) {
418      if (fd != 1) close(fd);
419      free(TT.lines);
420    }
421}
422