1/* uniq.c - report or filter out repeated lines in a file
2 *
3 * Copyright 2012 Georgi Chorbadzhiyski <georgi@unixsol.org>
4 *
5 * See http://opengroup.org/onlinepubs/9699919799/utilities/uniq.html
6
7USE_UNIQ(NEWTOY(uniq, "f#s#w#zicdu", TOYFLAG_USR|TOYFLAG_BIN))
8
9config UNIQ
10  bool "uniq"
11  default y
12  help
13    usage: uniq [-cduiz] [-w maxchars] [-f fields] [-s char] [input_file [output_file]]
14
15    Report or filter out repeated lines in a file
16
17    -c	show counts before each line
18    -d	show only lines that are repeated
19    -u	show only lines that are unique
20    -i	ignore case when comparing lines
21    -z	lines end with \0 not \n
22    -w	compare maximum X chars per line
23    -f	ignore first X fields
24    -s	ignore first X chars
25*/
26
27#define FOR_uniq
28#include "toys.h"
29
30GLOBALS(
31  long maxchars;
32  long nchars;
33  long nfields;
34  long repeats;
35)
36
37static char *skip(char *str)
38{
39  long nchars = TT.nchars, nfields;
40
41  // Skip fields first
42  for (nfields = TT.nfields; nfields; str++) {
43    while (*str && isspace(*str)) str++;
44    while (*str && !isspace(*str)) str++;
45    nfields--;
46  }
47  // Skip chars
48  while (*str && nchars--) str++;
49
50  return str;
51}
52
53static void print_line(FILE *f, char *line)
54{
55  if (toys.optflags & (TT.repeats ? FLAG_u : FLAG_d)) return;
56  if (toys.optflags & FLAG_c) fprintf(f, "%7lu ", TT.repeats + 1);
57  fputs(line, f);
58  if (toys.optflags & FLAG_z) fputc(0, f);
59}
60
61void uniq_main(void)
62{
63  FILE *infile = stdin, *outfile = stdout;
64  char *thisline = NULL, *prevline = NULL, *tmpline, eol = '\n';
65  size_t thissize, prevsize = 0, tmpsize;
66
67  if (toys.optc >= 1) infile = xfopen(toys.optargs[0], "r");
68  if (toys.optc >= 2) outfile = xfopen(toys.optargs[1], "w");
69
70  if (toys.optflags & FLAG_z) eol = 0;
71
72  // If first line can't be read
73  if (getdelim(&prevline, &prevsize, eol, infile) < 0)
74    return;
75
76  while (getdelim(&thisline, &thissize, eol, infile) > 0) {
77    int diff;
78    char *t1, *t2;
79
80    // If requested get the chosen fields + character offsets.
81    if (TT.nfields || TT.nchars) {
82      t1 = skip(thisline);
83      t2 = skip(prevline);
84    } else {
85      t1 = thisline;
86      t2 = prevline;
87    }
88
89    if (TT.maxchars == 0) {
90      diff = !(toys.optflags & FLAG_i) ? strcmp(t1, t2) : strcasecmp(t1, t2);
91    } else {
92      diff = !(toys.optflags & FLAG_i) ? strncmp(t1, t2, TT.maxchars)
93              : strncasecmp(t1, t2, TT.maxchars);
94    }
95
96    if (diff == 0) { // same
97      TT.repeats++;
98    } else {
99      print_line(outfile, prevline);
100
101      TT.repeats = 0;
102
103      tmpline = prevline;
104      prevline = thisline;
105      thisline = tmpline;
106
107      tmpsize = prevsize;
108      prevsize = thissize;
109      thissize = tmpsize;
110    }
111  }
112
113  print_line(outfile, prevline);
114
115  if (CFG_TOYBOX_FREE) {
116    if (outfile != stdout) fclose(outfile);
117    if (infile != stdin) fclose(infile);
118    free(prevline);
119    free(thisline);
120  }
121}
122