1/*	$NetBSD: util.c,v 1.16 2012/05/06 22:32:05 joerg Exp $	*/
2/*	$FreeBSD: head/usr.bin/grep/util.c 211496 2010-08-19 09:28:59Z des $	*/
3/*	$OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $	*/
4
5/*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#if HAVE_NBTOOL_CONFIG_H
33#include "nbtool_config.h"
34#endif
35
36#include <sys/cdefs.h>
37__RCSID("$NetBSD: util.c,v 1.16 2012/05/06 22:32:05 joerg Exp $");
38
39#include <sys/stat.h>
40#include <sys/types.h>
41
42#include <ctype.h>
43#include <err.h>
44#include <errno.h>
45#include <fnmatch.h>
46#include <fts.h>
47#include <libgen.h>
48#include <stdbool.h>
49#include <stdio.h>
50#include <stdlib.h>
51#include <string.h>
52#include <unistd.h>
53#include <wchar.h>
54#include <wctype.h>
55
56#include "grep.h"
57
58static bool	 first, first_global = true;
59static unsigned long long since_printed;
60
61static int	 procline(struct str *l, int);
62
63bool
64file_matching(const char *fname)
65{
66	char *fname_base, *fname_copy;
67	unsigned int i;
68	bool ret;
69
70	ret = finclude ? false : true;
71	fname_copy = grep_strdup(fname);
72	fname_base = basename(fname_copy);
73
74	for (i = 0; i < fpatterns; ++i) {
75		if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
76		    fnmatch(fpattern[i].pat, fname_base, 0) == 0) {
77			if (fpattern[i].mode == EXCL_PAT)
78				return (false);
79			else
80				ret = true;
81		}
82	}
83	free(fname_copy);
84	return (ret);
85}
86
87static inline bool
88dir_matching(const char *dname)
89{
90	unsigned int i;
91	bool ret;
92
93	ret = dinclude ? false : true;
94
95	for (i = 0; i < dpatterns; ++i) {
96		if (dname != NULL &&
97		    fnmatch(dname, dpattern[i].pat, 0) == 0) {
98			if (dpattern[i].mode == EXCL_PAT)
99				return (false);
100			else
101				ret = true;
102		}
103	}
104	return (ret);
105}
106
107/*
108 * Processes a directory when a recursive search is performed with
109 * the -R option.  Each appropriate file is passed to procfile().
110 */
111int
112grep_tree(char **argv)
113{
114	FTS *fts;
115	FTSENT *p;
116	char *d, *dir = NULL;
117	int c, fts_flags;
118	bool ok;
119
120	c = fts_flags = 0;
121
122	switch(linkbehave) {
123	case LINK_EXPLICIT:
124		fts_flags = FTS_COMFOLLOW;
125		break;
126	case LINK_SKIP:
127		fts_flags = FTS_PHYSICAL;
128		break;
129	default:
130		fts_flags = FTS_LOGICAL;
131
132	}
133
134	fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
135
136	if (!(fts = fts_open(argv, fts_flags, NULL)))
137		err(2, "fts_open");
138	while ((p = fts_read(fts)) != NULL) {
139		switch (p->fts_info) {
140		case FTS_DNR:
141			/* FALLTHROUGH */
142		case FTS_ERR:
143			errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
144			break;
145		case FTS_D:
146			/* FALLTHROUGH */
147		case FTS_DP:
148			break;
149		case FTS_DC:
150			/* Print a warning for recursive directory loop */
151			warnx("warning: %s: recursive directory loop",
152				p->fts_path);
153			break;
154		default:
155			/* Check for file exclusion/inclusion */
156			ok = true;
157			if (dexclude || dinclude) {
158				if ((d = strrchr(p->fts_path, '/')) != NULL) {
159					dir = grep_malloc(sizeof(char) *
160					    (d - p->fts_path + 1));
161					memcpy(dir, p->fts_path,
162					    d - p->fts_path);
163					dir[d - p->fts_path] = '\0';
164				}
165				ok = dir_matching(dir);
166				free(dir);
167				dir = NULL;
168			}
169			if (fexclude || finclude)
170				ok &= file_matching(p->fts_path);
171
172			if (ok)
173				c += procfile(p->fts_path);
174			break;
175		}
176	}
177
178	fts_close(fts);
179	return (c);
180}
181
182/*
183 * Opens a file and processes it.  Each file is processed line-by-line
184 * passing the lines to procline().
185 */
186int
187procfile(const char *fn)
188{
189	struct file *f;
190	struct stat sb;
191	struct str ln;
192	mode_t s;
193	int c, t;
194
195	if (mflag && (mcount <= 0))
196		return (0);
197
198	if (strcmp(fn, "-") == 0) {
199		fn = label != NULL ? label : getstr(1);
200		f = grep_open(NULL);
201	} else {
202		if (!stat(fn, &sb)) {
203			/* Check if we need to process the file */
204			s = sb.st_mode & S_IFMT;
205			if (s == S_IFDIR && dirbehave == DIR_SKIP)
206				return (0);
207			if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
208				|| s == S_IFSOCK) && devbehave == DEV_SKIP)
209					return (0);
210		}
211		f = grep_open(fn);
212	}
213	if (f == NULL) {
214		if (!sflag)
215			warn("%s", fn);
216		if (errno == ENOENT)
217			notfound = true;
218		return (0);
219	}
220
221	ln.file = grep_malloc(strlen(fn) + 1);
222	strcpy(ln.file, fn);
223	ln.line_no = 0;
224	ln.len = 0;
225	tail = 0;
226	ln.off = -1;
227
228	for (first = true, c = 0;  c == 0 || !(lflag || qflag); ) {
229		ln.off += ln.len + 1;
230		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0)
231			break;
232		if (ln.len > 0 && ln.dat[ln.len - 1] == line_sep)
233			--ln.len;
234		ln.line_no++;
235
236		/* Return if we need to skip a binary file */
237		if (f->binary && binbehave == BINFILE_SKIP) {
238			grep_close(f);
239			free(ln.file);
240			free(f);
241			return (0);
242		}
243		/* Process the file line-by-line */
244		t = procline(&ln, f->binary);
245		c += t;
246
247		/* Count the matches if we have a match limit */
248		if (mflag) {
249			mcount -= t;
250			if (mcount <= 0)
251				break;
252		}
253	}
254	if (Bflag > 0)
255		clearqueue();
256	grep_close(f);
257
258	if (cflag) {
259		if (!hflag)
260			printf("%s:", ln.file);
261		printf("%u%c", c, line_sep);
262	}
263	if (lflag && !qflag && c != 0)
264		printf("%s%c", fn, line_sep);
265	if (Lflag && !qflag && c == 0)
266		printf("%s%c", fn, line_sep);
267	if (c && !cflag && !lflag && !Lflag &&
268	    binbehave == BINFILE_BIN && f->binary && !qflag)
269		printf(getstr(8), fn);
270
271	free(ln.file);
272	free(f);
273	return (c);
274}
275
276#define iswword(x)	(iswalnum((x)) || (x) == L'_')
277
278/*
279 * Processes a line comparing it with the specified patterns.  Each pattern
280 * is looped to be compared along with the full string, saving each and every
281 * match, which is necessary to colorize the output and to count the
282 * matches.  The matching lines are passed to printline() to display the
283 * appropriate output.
284 */
285static int
286procline(struct str *l, int nottext)
287{
288	regmatch_t matches[MAX_LINE_MATCHES];
289	regmatch_t pmatch;
290	size_t st = 0;
291	unsigned int i;
292	int c = 0, m = 0, r = 0;
293
294	/* Loop to process the whole line */
295	while (st <= l->len) {
296		pmatch.rm_so = st;
297		pmatch.rm_eo = l->len;
298
299		/* Loop to compare with all the patterns */
300		for (i = 0; i < patterns; i++) {
301/*
302 * XXX: grep_search() is a workaround for speed up and should be
303 * removed in the future.  See fastgrep.c.
304 */
305			if (fg_pattern[i].pattern) {
306				r = grep_search(&fg_pattern[i],
307				    (unsigned char *)l->dat,
308				    l->len, &pmatch);
309				r = (r == 0) ? 0 : REG_NOMATCH;
310				st = pmatch.rm_eo;
311			} else {
312				r = regexec(&r_pattern[i], l->dat, 1,
313				    &pmatch, eflags);
314				r = (r == 0) ? 0 : REG_NOMATCH;
315				st = pmatch.rm_eo;
316			}
317			if (r == REG_NOMATCH)
318				continue;
319			/* Check for full match */
320			if (xflag &&
321			    (pmatch.rm_so != 0 ||
322			     (size_t)pmatch.rm_eo != l->len))
323				continue;
324			/* Check for whole word match */
325			if (fg_pattern[i].word && pmatch.rm_so != 0) {
326				wint_t wbegin, wend;
327
328				wbegin = wend = L' ';
329				if (pmatch.rm_so != 0 &&
330				    sscanf(&l->dat[pmatch.rm_so - 1],
331				    "%lc", &wbegin) != 1)
332					continue;
333				if ((size_t)pmatch.rm_eo != l->len &&
334				    sscanf(&l->dat[pmatch.rm_eo],
335				    "%lc", &wend) != 1)
336					continue;
337				if (iswword(wbegin) || iswword(wend))
338					continue;
339			}
340			c = 1;
341			if (m < MAX_LINE_MATCHES)
342				matches[m++] = pmatch;
343			/* matches - skip further patterns */
344			if ((color != NULL && !oflag) || qflag || lflag)
345				break;
346		}
347
348		if (vflag) {
349			c = !c;
350			break;
351		}
352		/* One pass if we are not recording matches */
353		if ((color != NULL && !oflag) || qflag || lflag)
354			break;
355
356		if (st == (size_t)pmatch.rm_so)
357			break; 	/* No matches */
358	}
359
360	if (c && binbehave == BINFILE_BIN && nottext)
361		return (c); /* Binary file */
362
363	/* Dealing with the context */
364	if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
365		if (c) {
366			if ((Aflag || Bflag) && !first_global &&
367			    (first || since_printed > Bflag))
368				printf("--\n");
369			tail = Aflag;
370			if (Bflag > 0)
371				printqueue();
372			printline(l, ':', matches, m);
373		} else {
374			printline(l, '-', matches, m);
375			tail--;
376		}
377		first = false;
378		first_global = false;
379		since_printed = 0;
380	} else {
381		if (Bflag)
382			enqueue(l);
383		since_printed++;
384	}
385	return (c);
386}
387
388/*
389 * Safe malloc() for internal use.
390 */
391void *
392grep_malloc(size_t size)
393{
394	void *ptr;
395
396	if ((ptr = malloc(size)) == NULL)
397		err(2, "malloc");
398	return (ptr);
399}
400
401/*
402 * Safe calloc() for internal use.
403 */
404void *
405grep_calloc(size_t nmemb, size_t size)
406{
407	void *ptr;
408
409	if ((ptr = calloc(nmemb, size)) == NULL)
410		err(2, "calloc");
411	return (ptr);
412}
413
414/*
415 * Safe realloc() for internal use.
416 */
417void *
418grep_realloc(void *ptr, size_t size)
419{
420
421	if ((ptr = realloc(ptr, size)) == NULL)
422		err(2, "realloc");
423	return (ptr);
424}
425
426/*
427 * Safe strdup() for internal use.
428 */
429char *
430grep_strdup(const char *str)
431{
432	char *ret;
433
434	if ((ret = strdup(str)) == NULL)
435		err(2, "strdup");
436	return (ret);
437}
438
439/*
440 * Prints a matching line according to the command line options.
441 */
442void
443printline(struct str *line, int sep, regmatch_t *matches, int m)
444{
445	size_t a = 0;
446	int i, n = 0;
447
448	if (!hflag) {
449		if (nullflag == 0)
450			fputs(line->file, stdout);
451		else {
452			printf("%s", line->file);
453			putchar(0);
454		}
455		++n;
456	}
457	if (nflag) {
458		if (n > 0)
459			putchar(sep);
460		printf("%d", line->line_no);
461		++n;
462	}
463	if (bflag) {
464		if (n > 0)
465			putchar(sep);
466		printf("%lld", (long long)line->off);
467		++n;
468	}
469	if (n)
470		putchar(sep);
471	/* --color and -o */
472	if ((oflag || color) && m > 0) {
473		for (i = 0; i < m; i++) {
474			if (!oflag)
475				fwrite(line->dat + a, matches[i].rm_so - a, 1,
476				    stdout);
477			if (color)
478				fprintf(stdout, "\33[%sm\33[K", color);
479
480				fwrite(line->dat + matches[i].rm_so,
481				    matches[i].rm_eo - matches[i].rm_so, 1,
482				    stdout);
483			if (color)
484				fprintf(stdout, "\33[m\33[K");
485			a = matches[i].rm_eo;
486			if (oflag)
487				putchar('\n');
488		}
489		if (!oflag) {
490			if (line->len - a > 0)
491				fwrite(line->dat + a, line->len - a, 1, stdout);
492			putchar(line_sep);
493		}
494	} else {
495		fwrite(line->dat, line->len, 1, stdout);
496		putchar(line_sep);
497	}
498}
499