sort.c revision 3a9241add947cb6d24b5de7a8927517426a78795
1/* vi: set sw=4 ts=4: 2 * 3 * sort.c - put input lines into order 4 * 5 * Copyright 2004, 2008 Rob Landley <rob@landley.net> 6 * 7 * See http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html 8 9USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")USE_SORT_BIG("S:T:m" "o:k*t:xbMcszdfi") "run", TOYFLAG_USR|TOYFLAG_BIN)) 10 11config SORT 12 bool "sort" 13 default y 14 help 15 usage: sort [-run] [FILE...] 16 17 Sort all lines of text from input files (or stdin) to stdout. 18 19 -r reverse 20 -u unique lines only 21 -n numeric order (instead of alphabetical) 22 23config SORT_BIG 24 bool "SuSv3 options (Support -ktcsbdfiozM)" 25 default y 26 depends on SORT 27 help 28 usage: sort [-bcdfiMsz] [-k#[,#[x]] [-t X]] [-o FILE] 29 30 -b ignore leading blanks (or trailing blanks in second part of key) 31 -c check whether input is sorted 32 -d dictionary order (use alphanumeric and whitespace chars only) 33 -f force uppercase (case insensitive sort) 34 -i ignore nonprinting characters 35 -M month sort (jan, feb, etc). 36 -x Hexadecimal numerical sort 37 -s skip fallback sort (only sort with keys) 38 -z zero (null) terminated input 39 -k sort by "key" (see below) 40 -t use a key separator other than whitespace 41 -o output to FILE instead of stdout 42 43 Sorting by key looks at a subset of the words on each line. -k2 44 uses the second word to the end of the line, -k2,2 looks at only 45 the second word, -k2,4 looks from the start of the second to the end 46 of the fourth word. Specifying multiple keys uses the later keys as 47 tie breakers, in order. A type specifier appended to a sort key 48 (such as -2,2n) applies only to sorting that key. 49 50config SORT_FLOAT 51 bool "Floating point (-g)" 52 default y 53 depends on SORT_BIG 54 help 55 usage: sort [-g] 56 57 This version of sort requires floating point. 58 59 -g general numeric sort (double precision with nan and inf) 60 61*/ 62 63#include "toys.h" 64 65DEFINE_GLOBALS( 66 char *key_separator; 67 struct arg_list *raw_keys; 68 char *outfile; 69 char *ignore1, ignore2; // GNU compatability NOPs for -S and -T. 70 71 void *key_list; 72 int linecount; 73 char **lines; 74) 75 76#define TT this.sort 77 78// The sort types are n, g, and M. 79// u, c, s, and z apply to top level only, not to keys. 80// b at top level implies bb. 81// The remaining options can be applied to search keys. 82 83#define FLAG_n (1<<0) // Sort type: numeric 84#define FLAG_u (1<<1) // Unique 85#define FLAG_r (1<<2) // Reverse output order 86 87#define FLAG_i (1<<3) // Ignore !isprint() 88#define FLAG_f (1<<4) // Force uppercase 89#define FLAG_d (1<<5) // Ignore !(isalnum()|isspace()) 90#define FLAG_z (1<<6) // Input is null terminated, not \n 91#define FLAG_s (1<<7) // Stable sort, no ascii fallback at end 92#define FLAG_c (1<<8) // Check only. No output, exit(!ordered) 93#define FLAG_M (1<<9) // Sort type: date 94#define FLAG_b (1<<10) // Ignore leading blanks 95#define FLAG_x (1<<11) // Hex sort 96#define FLAG_g (1<<18) // Sort type: strtod() 97 98// Left off dealing with FLAG_b/FLAG_bb logic... 99 100#define FLAG_bb (1<<31) // Ignore trailing blanks 101 102struct sort_key 103{ 104 struct sort_key *next_key; // linked list 105 unsigned range[4]; // start word, start char, end word, end char 106 int flags; 107}; 108 109// Copy of the part of this string corresponding to a key/flags. 110 111static char *get_key_data(char *str, struct sort_key *key, int flags) 112{ 113 int start=0, end, len, i, j; 114 115 // Special case whole string, so we don't have to make a copy 116 117 if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3] 118 && !(flags&(FLAG_b&FLAG_d&FLAG_f&FLAG_i&FLAG_bb))) return str; 119 120 // Find start of key on first pass, end on second pass 121 122 len = strlen(str); 123 for (j=0; j<2; j++) { 124 if (!key->range[2*j]) end=len; 125 126 // Loop through fields 127 else { 128 end=0; 129 for (i=1; i < key->range[2*j]+j; i++) { 130 131 // Skip leading blanks 132 if (str[end] && !TT.key_separator) 133 while (isspace(str[end])) end++; 134 135 // Skip body of key 136 for (; str[end]; end++) { 137 if (TT.key_separator) { 138 if (str[end]==*TT.key_separator) break; 139 } else if (isspace(str[end])) break; 140 } 141 } 142 } 143 if (!j) start=end; 144 } 145 146 // Key with explicit separator starts after the separator 147 if (TT.key_separator && str[start]==*TT.key_separator) start++; 148 149 // Strip leading and trailing whitespace if necessary 150 if (flags&FLAG_b) while (isspace(str[start])) start++; 151 if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--; 152 153 // Handle offsets on start and end 154 if (key->range[3]) { 155 end += key->range[3]-1; 156 if (end>len) end=len; 157 } 158 if (key->range[1]) { 159 start += key->range[1]-1; 160 if (start>len) start=len; 161 } 162 163 // Make the copy 164 if (end<start) end=start; 165 str = xstrndup(str+start, end-start); 166 167 // Handle -d 168 if (flags&FLAG_d) { 169 for (start = end = 0; str[end]; end++) 170 if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end]; 171 str[start] = 0; 172 } 173 174 // Handle -i 175 if (flags&FLAG_i) { 176 for (start = end = 0; str[end]; end++) 177 if (isprint(str[end])) str[start++] = str[end]; 178 str[start] = 0; 179 } 180 181 // Handle -f 182 if (flags*FLAG_f) for(i=0; str[i]; i++) str[i] = toupper(str[i]); 183 184 return str; 185} 186 187// append a sort_key to key_list. 188 189static struct sort_key *add_key(void) 190{ 191 void **stupid_compiler = &TT.key_list; 192 struct sort_key **pkey = (struct sort_key **)stupid_compiler; 193 194 while (*pkey) pkey = &((*pkey)->next_key); 195 return *pkey = xzalloc(sizeof(struct sort_key)); 196} 197 198// Perform actual comparison 199static int compare_values(int flags, char *x, char *y) 200{ 201 int ff = flags & (FLAG_n|FLAG_g|FLAG_M|FLAG_x); 202 203 // Ascii sort 204 if (!ff) return strcmp(x, y); 205 206 if (CFG_SORT_FLOAT && ff == FLAG_g) { 207 char *xx,*yy; 208 double dx = strtod(x,&xx), dy = strtod(y,&yy); 209 int xinf, yinf; 210 211 // not numbers < NaN < -infinity < numbers < +infinity 212 213 if (x==xx) return y==yy ? 0 : -1; 214 if (y==yy) return 1; 215 216 // Check for isnan 217 if (dx!=dx) return (dy!=dy) ? 0 : -1; 218 if (dy!=dy) return 1; 219 220 // Check for infinity. (Could underflow, but avoids needing libm.) 221 xinf = (1.0/dx == 0.0); 222 yinf = (1.0/dy == 0.0); 223 if (xinf) { 224 if(dx<0) return (yinf && dy<0) ? 0 : -1; 225 return (yinf && dy>0) ? 0 : 1; 226 } 227 if (yinf) return dy<0 ? 1 : -1; 228 229 return dx>dy ? 1 : (dx<dy ? -1 : 0); 230 } else if (CFG_SORT_BIG && ff == FLAG_M) { 231 struct tm thyme; 232 int dx; 233 char *xx,*yy; 234 235 xx = strptime(x,"%b",&thyme); 236 dx = thyme.tm_mon; 237 yy = strptime(y,"%b",&thyme); 238 if (!xx) return !yy ? 0 : -1; 239 else if (!yy) return 1; 240 else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon; 241 242 } else if (CFG_SORT_BIG && ff == FLAG_x) { 243 return strtol(x, NULL, 16)-strtol(y, NULL, 16); 244 // This has to be ff == FLAG_n 245 } else { 246 // Full floating point version of -n 247 if (CFG_SORT_FLOAT) { 248 double dx = atof(x), dy = atof(y); 249 250 return dx>dy ? 1 : (dx<dy ? -1 : 0); 251 // Integer version of -n for tiny systems 252 } else return atoi(x)-atoi(y); 253 } 254} 255 256 257// Callback from qsort(): Iterate through key_list and perform comparisons. 258static int compare_keys(const void *xarg, const void *yarg) 259{ 260 int flags = toys.optflags, retval = 0; 261 char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg; 262 struct sort_key *key; 263 264 if (CFG_SORT_BIG) { 265 for (key=(struct sort_key *)TT.key_list; !retval && key; 266 key = key->next_key) 267 { 268 flags = key->flags ? key->flags : toys.optflags; 269 270 // Chop out and modify key chunks, handling -dfib 271 272 x = get_key_data(xx, key, flags); 273 y = get_key_data(yy, key, flags); 274 275 retval = compare_values(flags, x, y); 276 277 // Free the copies get_key_data() made. 278 279 if (x != xx) free(x); 280 if (y != yy) free(y); 281 282 if (retval) break; 283 } 284 } else retval = compare_values(flags, xx, yy); 285 286 // Perform fallback sort if necessary 287 if (!retval && !(CFG_SORT_BIG && (toys.optflags&FLAG_s))) { 288 retval = strcmp(xx, yy); 289 flags = toys.optflags; 290 } 291 292 return retval * ((flags&FLAG_r) ? -1 : 1); 293} 294 295// Callback from loopfiles to handle input files. 296static void sort_read(int fd, char *name) 297{ 298 // Read each line from file, appending to a big array. 299 300 for (;;) { 301 char * line = (CFG_SORT_BIG && (toys.optflags&FLAG_z)) 302 ? get_rawline(fd, NULL, 0) : get_line(fd); 303 304 if (!line) break; 305 306 // handle -c here so we don't allocate more memory than necessary. 307 if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) { 308 int j = (toys.optflags&FLAG_u) ? -1 : 0; 309 310 if (TT.lines && compare_keys((void *)&TT.lines, &line)>j) 311 error_exit("%s: Check line %d\n", name, TT.linecount); 312 free(TT.lines); 313 TT.lines = (char **)line; 314 } else { 315 if (!(TT.linecount&63)) 316 TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64)); 317 TT.lines[TT.linecount] = line; 318 } 319 TT.linecount++; 320 } 321} 322 323void sort_main(void) 324{ 325 int idx, fd = 1; 326 327 // Open output file if necessary. 328 if (CFG_SORT_BIG && TT.outfile) 329 fd = xcreate(TT.outfile, O_CREAT|O_TRUNC|O_WRONLY, 0666); 330 331 // Parse -k sort keys. 332 if (CFG_SORT_BIG && TT.raw_keys) { 333 struct arg_list *arg; 334 335 for (arg = TT.raw_keys; arg; arg = arg->next) { 336 struct sort_key *key = add_key(); 337 char *temp; 338 int flag; 339 340 idx = 0; 341 temp = arg->arg; 342 while (*temp) { 343 // Start of range 344 key->range[2*idx] = (unsigned)strtol(temp, &temp, 10); 345 if (*temp=='.') 346 key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10); 347 348 // Handle flags appended to a key type. 349 for (;*temp;temp++) { 350 char *temp2, *optlist; 351 352 // Note that a second comma becomes an "Unknown key" error. 353 354 if (*temp==',' && !idx++) { 355 temp++; 356 break; 357 } 358 359 // Which flag is this? 360 361 optlist = toys.which->options; 362 temp2 = strchr(optlist, *temp); 363 flag = (1<<(optlist-temp2+strlen(optlist)-1)); 364 365 // Was it a flag that can apply to a key? 366 367 if (!temp2 || flag>FLAG_b 368 || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z))) 369 { 370 error_exit("Unknown key option."); 371 } 372 // b after , means strip _trailing_ space, not leading. 373 if (idx && flag==FLAG_b) flag = FLAG_bb; 374 key->flags |= flag; 375 } 376 } 377 } 378 } 379 380 // global b flag strips both leading and trailing spaces 381 if (toys.optflags&FLAG_b) toys.optflags |= FLAG_bb; 382 383 // If no keys, perform alphabetic sort over the whole line. 384 if (CFG_SORT_BIG && !TT.key_list) add_key()->range[0] = 1; 385 386 // Open input files and read data, populating TT.lines[TT.linecount] 387 loopfiles(toys.optargs, sort_read); 388 389 // The compare (-c) logic was handled in sort_read(), 390 // so if we got here, we're done. 391 if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) goto exit_now; 392 393 // Perform the actual sort 394 qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys); 395 396 // handle unique (-u) 397 if (toys.optflags&FLAG_u) { 398 int jdx; 399 400 for (jdx=0, idx=1; idx<TT.linecount; idx++) { 401 if (!compare_keys(&TT.lines[jdx], &TT.lines[idx])) 402 free(TT.lines[idx]); 403 else TT.lines[++jdx] = TT.lines[idx]; 404 } 405 if (TT.linecount) TT.linecount = jdx+1; 406 } 407 408 // Output result 409 for (idx = 0; idx<TT.linecount; idx++) { 410 char *s = TT.lines[idx]; 411 xwrite(fd, s, strlen(s)); 412 if (CFG_TOYBOX_FREE) free(s); 413 xwrite(fd, "\n", 1); 414 } 415 416exit_now: 417 if (CFG_TOYBOX_FREE) { 418 if (fd != 1) close(fd); 419 free(TT.lines); 420 } 421} 422