1/* grep.c - print lines what match given regular expression
2 *
3 * Copyright 2013 CE Strake <strake888 at gmail.com>
4 *
5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/grep.html
6 *
7 * TODO: -ABC
8
9USE_GREP(NEWTOY(grep, "C#B#A#ZzEFHabhinorsvwclqe*f*m#x[!wx][!EFw]", TOYFLAG_BIN))
10USE_EGREP(OLDTOY(egrep, grep, TOYFLAG_BIN))
11USE_FGREP(OLDTOY(fgrep, grep, TOYFLAG_BIN))
12
13config GREP
14  bool "grep"
15  default y
16  help
17    usage: grep [-EFivwcloqsHbhn] [-A NUM] [-m MAX] [-e REGEX]... [-f REGFILE] [FILE]...
18
19    Show lines matching regular expressions. If no -e, first argument is
20    regular expression to match. With no files (or "-" filename) read stdin.
21    Returns 0 if matched, 1 if no match found.
22
23    -e  Regex to match. (May be repeated.)
24    -f  File containing regular expressions to match.
25
26    match type:
27    -A  Show NUM lines after     -B  Show NUM lines before match
28    -C  NUM lines context (A+B)  -E  extended regex syntax
29    -F  fixed (literal match)    -i  case insensitive
30    -m  match MAX many lines     -r  recursive (on dir)
31    -v  invert match             -w  whole word (implies -E)
32    -x  whole line               -z  input NUL terminated
33
34    display modes: (default: matched line)
35    -c  count of matching lines  -l  show matching filenames
36    -o  only matching part       -q  quiet (errors only)
37    -s  silent (no error msg)    -Z  output NUL terminated
38
39    output prefix (default: filename if checking more than 1 file)
40    -H  force filename           -b  byte offset of match
41    -h  hide filename            -n  line number of match
42
43config EGREP
44  bool
45  default y
46  depends on GREP
47
48config FGREP
49  bool
50  default y
51  depends on GREP
52*/
53
54#define FOR_grep
55#include "toys.h"
56#include <regex.h>
57
58GLOBALS(
59  long m;
60  struct arg_list *f;
61  struct arg_list *e;
62  long a;
63  long b;
64  long c;
65
66  char indelim, outdelim;
67)
68
69// Emit line with various potential prefixes and delimiter
70static void outline(char *line, char dash, char *name, long lcount, long bcount,
71  int trim)
72{
73  if (name && (toys.optflags&FLAG_H)) printf("%s%c", name, dash);
74  if (!line || (lcount && (toys.optflags&FLAG_n)))
75    printf("%ld%c", lcount, line ? dash : TT.outdelim);
76  if (bcount && (toys.optflags&FLAG_b)) printf("%ld%c", bcount-1, dash);
77  if (line) xprintf("%.*s%c", trim ? trim : INT_MAX/2, line, TT.outdelim);
78}
79
80// Show matches in one file
81static void do_grep(int fd, char *name)
82{
83  struct double_list *dlb = 0;
84  FILE *file = fdopen(fd, "r");
85  long lcount = 0, mcount = 0, offset = 0, after = 0, before = 0;
86  char *bars = 0;
87
88  if (!fd) name = "(standard input)";
89
90  if (!file) {
91    perror_msg_raw(name);
92    return;
93  }
94
95  // Loop through lines of input
96  for (;;) {
97    char *line = 0, *start;
98    regmatch_t matches;
99    size_t unused;
100    long len;
101    int mmatch = 0;
102
103    lcount++;
104    if (0 > (len = getdelim(&line, &unused, TT.indelim, file))) break;
105    if (line[len-1] == TT.indelim) line[len-1] = 0;
106
107    start = line;
108
109    // Loop through matches in this line
110    do {
111      int rc = 0, skip = 0;
112
113      // Handle non-regex matches
114      if (toys.optflags & FLAG_F) {
115        struct arg_list *seek, fseek;
116        char *s = 0;
117
118        for (seek = TT.e; seek; seek = seek->next) {
119          if (toys.optflags & FLAG_x) {
120            int i = (toys.optflags & FLAG_i);
121
122            if ((i ? strcasecmp : strcmp)(seek->arg, line)) s = line;
123          } else if (!*seek->arg) {
124            seek = &fseek;
125            fseek.arg = s = line;
126            break;
127          }
128          if (toys.optflags & FLAG_i) s = strnstr(line, seek->arg);
129          else s = strstr(line, seek->arg);
130          if (s) break;
131        }
132
133        if (s) {
134          matches.rm_so = (s-line);
135          skip = matches.rm_eo = (s-line)+strlen(seek->arg);
136        } else rc = 1;
137      } else {
138        rc = regexec((regex_t *)toybuf, start, 1, &matches,
139                     start==line ? 0 : REG_NOTBOL);
140        skip = matches.rm_eo;
141      }
142
143      if (toys.optflags & FLAG_x)
144        if (matches.rm_so || line[matches.rm_eo]) rc = 1;
145
146      if (!rc && (toys.optflags & FLAG_w)) {
147        char c = 0;
148
149        if ((start+matches.rm_so)!=line) {
150          c = start[matches.rm_so-1];
151          if (!isalnum(c) && c != '_') c = 0;
152        }
153        if (!c) {
154          c = start[matches.rm_eo];
155          if (!isalnum(c) && c != '_') c = 0;
156        }
157        if (c) {
158          start += matches.rm_so+1;
159
160          continue;
161        }
162      }
163
164      if (toys.optflags & FLAG_v) {
165        if (toys.optflags & FLAG_o) {
166          if (rc) skip = matches.rm_eo = strlen(start);
167          else if (!matches.rm_so) {
168            start += skip;
169            continue;
170          } else matches.rm_eo = matches.rm_so;
171        } else {
172          if (!rc) break;
173          matches.rm_eo = strlen(start);
174        }
175        matches.rm_so = 0;
176      } else if (rc) break;
177
178      // At least one line we didn't print since match while -ABC active
179      if (bars) {
180        xputs(bars);
181        bars = 0;
182      }
183      mmatch++;
184      toys.exitval = 0;
185      if (toys.optflags & FLAG_q) xexit();
186      if (toys.optflags & FLAG_l) {
187        xprintf("%s%c", name, TT.outdelim);
188        free(line);
189        fclose(file);
190        return;
191      }
192      if (toys.optflags & FLAG_o)
193        if (matches.rm_eo == matches.rm_so)
194          break;
195
196      if (!(toys.optflags & FLAG_c)) {
197        long bcount = 1 + offset + (start-line) +
198          ((toys.optflags & FLAG_o) ? matches.rm_so : 0);
199
200        if (!(toys.optflags & FLAG_o)) {
201          while (dlb) {
202            struct double_list *dl = dlist_pop(&dlb);
203
204            outline(dl->data, '-', name, lcount-before, 0, 0);
205            free(dl->data);
206            free(dl);
207            before--;
208          }
209
210          outline(line, ':', name, lcount, bcount, 0);
211          if (TT.a) after = TT.a+1;
212        } else outline(start+matches.rm_so, ':', name, lcount, bcount,
213                       matches.rm_eo-matches.rm_so);
214      }
215
216      start += skip;
217      if (!(toys.optflags & FLAG_o)) break;
218    } while (*start);
219    offset += len;
220
221    if (mmatch) mcount++;
222    else {
223      int discard = (after || TT.b);
224
225      if (after && --after) {
226        outline(line, '-', name, lcount, 0, 0);
227        discard = 0;
228      }
229      if (discard && TT.b) {
230        dlist_add(&dlb, line);
231        line = 0;
232        if (++before>TT.b) {
233          struct double_list *dl;
234
235          dl = dlist_pop(&dlb);
236          free(dl->data);
237          free(dl);
238          before--;
239        } else discard = 0;
240      }
241      // If we discarded a line while displaying context, show bars before next
242      // line (but don't show them now in case that was last match in file)
243      if (discard && mcount) bars = "--";
244    }
245    free(line);
246
247    if ((toys.optflags & FLAG_m) && mcount >= TT.m) break;
248  }
249
250  if (toys.optflags & FLAG_c) outline(0, ':', name, mcount, 0, 0);
251
252  // loopfiles will also close the fd, but this frees an (opaque) struct.
253  fclose(file);
254}
255
256static void parse_regex(void)
257{
258  struct arg_list *al, *new, *list = NULL;
259  long len = 0;
260  char *s, *ss;
261
262  // Add all -f lines to -e list. (Yes, this is leaking allocation context for
263  // exit to free. Not supporting nofork for this command any time soon.)
264  al = TT.f ? TT.f : TT.e;
265  while (al) {
266    if (TT.f) s = ss = xreadfile(al->arg, 0, 0);
267    else s = ss = al->arg;
268
269    // Split lines at \n, add individual lines to new list.
270    do {
271      ss = strchr(s, '\n');
272      if (ss) *(ss++) = 0;
273      new = xmalloc(sizeof(struct arg_list));
274      new->next = list;
275      new->arg = s;
276      list = new;
277      s = ss;
278    } while (ss && *s);
279
280    // Advance, when we run out of -f switch to -e.
281    al = al->next;
282    if (!al && TT.f) {
283      TT.f = 0;
284      al = TT.e;
285    }
286  }
287  TT.e = list;
288
289  if (!(toys.optflags & FLAG_F)) {
290    char *regstr;
291    int i;
292
293    // Convert strings to one big regex
294    for (al = TT.e; al; al = al->next)
295      len += strlen(al->arg)+1+!(toys.optflags & FLAG_E);
296
297    regstr = s = xmalloc(len);
298    for (al = TT.e; al; al = al->next) {
299      s = stpcpy(s, al->arg);
300      if (!(toys.optflags & FLAG_E)) *(s++) = '\\';
301      *(s++) = '|';
302    }
303    *(s-=(1+!(toys.optflags & FLAG_E))) = 0;
304
305    i = regcomp((regex_t *)toybuf, regstr,
306                ((toys.optflags & FLAG_E) ? REG_EXTENDED : 0) |
307                ((toys.optflags & FLAG_i) ? REG_ICASE    : 0));
308
309    if (i) {
310      regerror(i, (regex_t *)toybuf, toybuf+sizeof(regex_t),
311               sizeof(toybuf)-sizeof(regex_t));
312      error_exit("bad REGEX: %s", toybuf);
313    }
314  }
315}
316
317static int do_grep_r(struct dirtree *new)
318{
319  char *name;
320
321  if (new->parent && !dirtree_notdotdot(new)) return 0;
322  if (S_ISDIR(new->st.st_mode)) return DIRTREE_RECURSE;
323
324  // "grep -r onefile" doesn't show filenames, but "grep -r onedir" should.
325  if (new->parent && !(toys.optflags & FLAG_h)) toys.optflags |= FLAG_H;
326
327  name = dirtree_path(new, 0);
328  do_grep(openat(dirtree_parentfd(new), new->name, 0), name);
329  free(name);
330
331  return 0;
332}
333
334void grep_main(void)
335{
336  char **ss = toys.optargs;
337
338  if (!TT.a) TT.a = TT.c;
339  if (!TT.b) TT.b = TT.c;
340
341  TT.indelim = '\n' * !(toys.optflags&FLAG_z);
342  TT.outdelim = '\n' * !(toys.optflags&FLAG_Z);
343
344  // Handle egrep and fgrep
345  if (*toys.which->name == 'e') toys.optflags |= FLAG_E;
346  if (*toys.which->name == 'f') toys.optflags |= FLAG_F;
347
348  if (!TT.e && !TT.f) {
349    if (!*ss) error_exit("no REGEX");
350    TT.e = xzalloc(sizeof(struct arg_list));
351    TT.e->arg = *(ss++);
352    toys.optc--;
353  }
354
355  parse_regex();
356
357  if (!(toys.optflags & FLAG_h) && toys.optc>1) toys.optflags |= FLAG_H;
358
359  toys.exitval = 1;
360  if (toys.optflags & FLAG_s) {
361    close(2);
362    xopen("/dev/null", O_RDWR);
363  }
364
365  if (toys.optflags & FLAG_r) {
366    // Iterate through -r arguments. Use "." as default if none provided.
367    for (ss = *ss ? ss : (char *[]){".", 0}; *ss; ss++) {
368      if (!strcmp(*ss, "-")) do_grep(0, *ss);
369      else dirtree_read(*ss, do_grep_r);
370    }
371  } else loopfiles_rw(ss, O_RDONLY, 0, 1, do_grep);
372}
373