file.c revision 3e8b1581ff0f2daa934eb9d6362dfe4e2b4fa8c9
1/*	$NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $	*/
2/*	$FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $	*/
3/*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
4
5/*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#if HAVE_NBTOOL_CONFIG_H
34#include "nbtool_config.h"
35#endif
36
37#include <sys/cdefs.h>
38__RCSID("$NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $");
39
40#include <sys/param.h>
41#include <sys/types.h>
42#include <sys/stat.h>
43
44#ifndef ANDROID
45#include <bzlib.h>
46#endif
47#include <err.h>
48#include <errno.h>
49#include <fcntl.h>
50#include <stddef.h>
51#include <stdlib.h>
52#include <string.h>
53#include <unistd.h>
54#include <wchar.h>
55#include <wctype.h>
56#ifndef ANDROID
57#include <zlib.h>
58#endif
59
60#include "grep.h"
61
62#define	MAXBUFSIZ	(32 * 1024)
63#define	LNBUFBUMP	80
64
65#ifndef ANDROID
66static gzFile gzbufdesc;
67static BZFILE* bzbufdesc;
68#endif
69
70static unsigned char buffer[MAXBUFSIZ];
71static unsigned char *bufpos;
72static size_t bufrem;
73
74static unsigned char *lnbuf;
75static size_t lnbuflen;
76
77static inline int
78grep_refill(struct file *f)
79{
80	ssize_t nr;
81	int bzerr;
82
83	bufpos = buffer;
84	bufrem = 0;
85
86#ifndef ANDROID
87	if (filebehave == FILE_GZIP)
88		nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
89	else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
90		nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
91		switch (bzerr) {
92		case BZ_OK:
93		case BZ_STREAM_END:
94			/* No problem, nr will be okay */
95			break;
96		case BZ_DATA_ERROR_MAGIC:
97			/*
98			 * As opposed to gzread(), which simply returns the
99			 * plain file data, if it is not in the correct
100			 * compressed format, BZ2_bzRead() instead aborts.
101			 *
102			 * So, just restart at the beginning of the file again,
103			 * and use plain reads from now on.
104			 */
105			BZ2_bzReadClose(&bzerr, bzbufdesc);
106			bzbufdesc = NULL;
107			if (lseek(f->fd, 0, SEEK_SET) == -1)
108				return (-1);
109			nr = read(f->fd, buffer, MAXBUFSIZ);
110			break;
111		default:
112			/* Make sure we exit with an error */
113			nr = -1;
114		}
115	} else
116#endif
117		nr = read(f->fd, buffer, MAXBUFSIZ);
118
119	if (nr < 0)
120		return (-1);
121
122	bufrem = nr;
123	return (0);
124}
125
126static inline int
127grep_lnbufgrow(size_t newlen)
128{
129
130	if (lnbuflen < newlen) {
131		lnbuf = grep_realloc(lnbuf, newlen);
132		lnbuflen = newlen;
133	}
134
135	return (0);
136}
137
138char *
139grep_fgetln(struct file *f, size_t *lenp)
140{
141	unsigned char *p;
142	char *ret;
143	size_t len;
144	size_t off;
145	ptrdiff_t diff;
146
147	/* Fill the buffer, if necessary */
148	if (bufrem == 0 && grep_refill(f) != 0)
149		goto error;
150
151	if (bufrem == 0) {
152		/* Return zero length to indicate EOF */
153		*lenp = 0;
154		return ((char *)bufpos);
155	}
156
157	/* Look for a newline in the remaining part of the buffer */
158	if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
159		++p; /* advance over newline */
160		ret = (char *)bufpos;
161		len = p - bufpos;
162		bufrem -= len;
163		bufpos = p;
164		*lenp = len;
165		return (ret);
166	}
167
168	/* We have to copy the current buffered data to the line buffer */
169	for (len = bufrem, off = 0; ; len += bufrem) {
170		/* Make sure there is room for more data */
171		if (grep_lnbufgrow(len + LNBUFBUMP))
172			goto error;
173		memcpy(lnbuf + off, bufpos, len - off);
174		off = len;
175		if (grep_refill(f) != 0)
176			goto error;
177		if (bufrem == 0)
178			/* EOF: return partial line */
179			break;
180		if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
181			continue;
182		/* got it: finish up the line (like code above) */
183		++p;
184		diff = p - bufpos;
185		len += diff;
186		if (grep_lnbufgrow(len))
187		    goto error;
188		memcpy(lnbuf + off, bufpos, diff);
189		bufrem -= diff;
190		bufpos = p;
191		break;
192	}
193	*lenp = len;
194	return ((char *)lnbuf);
195
196error:
197	*lenp = 0;
198	return (NULL);
199}
200
201static inline struct file *
202grep_file_init(struct file *f)
203{
204
205#ifndef ANDROID
206	if (filebehave == FILE_GZIP &&
207	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
208		goto error;
209
210	if (filebehave == FILE_BZIP &&
211	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
212		goto error;
213#endif
214
215	/* Fill read buffer, also catches errors early */
216	if (grep_refill(f) != 0)
217		goto error;
218
219	/* Check for binary stuff, if necessary */
220	if (!nulldataflag && binbehave != BINFILE_TEXT &&
221	    memchr(bufpos, '\0', bufrem) != NULL)
222		f->binary = true;
223
224	return (f);
225error:
226	close(f->fd);
227	free(f);
228	return (NULL);
229}
230
231/*
232 * Opens a file for processing.
233 */
234struct file *
235grep_open(const char *path)
236{
237	struct file *f;
238
239	f = grep_malloc(sizeof *f);
240	memset(f, 0, sizeof *f);
241	if (path == NULL) {
242		/* Processing stdin implies --line-buffered. */
243		lbflag = true;
244		f->fd = STDIN_FILENO;
245	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
246		free(f);
247		return (NULL);
248	}
249
250	return (grep_file_init(f));
251}
252
253/*
254 * Closes a file.
255 */
256void
257grep_close(struct file *f)
258{
259
260	close(f->fd);
261
262	/* Reset read buffer and line buffer */
263	bufpos = buffer;
264	bufrem = 0;
265
266	free(lnbuf);
267	lnbuf = NULL;
268	lnbuflen = 0;
269}
270