1/*	$NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $	*/
2/*	$FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $	*/
3/*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
4
5/*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#if HAVE_NBTOOL_CONFIG_H
34#include "nbtool_config.h"
35#endif
36
37#include <sys/cdefs.h>
38__RCSID("$NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $");
39
40#include <sys/param.h>
41#include <sys/types.h>
42#include <sys/stat.h>
43
44#ifndef __ANDROID__
45#include <bzlib.h>
46#endif
47#include <err.h>
48#include <errno.h>
49#include <fcntl.h>
50#include <stddef.h>
51#include <stdlib.h>
52#include <string.h>
53#include <unistd.h>
54#include <wchar.h>
55#include <wctype.h>
56#ifndef __ANDROID__
57#include <zlib.h>
58#endif
59
60#include "grep.h"
61
62#define	MAXBUFSIZ	(32 * 1024)
63#define	LNBUFBUMP	80
64
65#ifndef __ANDROID__
66static gzFile gzbufdesc;
67static BZFILE* bzbufdesc;
68#endif
69
70static unsigned char buffer[MAXBUFSIZ];
71static unsigned char *bufpos;
72static size_t bufrem;
73
74static unsigned char *lnbuf;
75static size_t lnbuflen;
76
77static inline int
78grep_refill(struct file *f)
79{
80	ssize_t nr;
81#ifndef __ANDROID__
82	int bzerr;
83#endif
84
85	bufpos = buffer;
86	bufrem = 0;
87
88#ifndef __ANDROID__
89	if (filebehave == FILE_GZIP)
90		nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
91	else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
92		nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
93		switch (bzerr) {
94		case BZ_OK:
95		case BZ_STREAM_END:
96			/* No problem, nr will be okay */
97			break;
98		case BZ_DATA_ERROR_MAGIC:
99			/*
100			 * As opposed to gzread(), which simply returns the
101			 * plain file data, if it is not in the correct
102			 * compressed format, BZ2_bzRead() instead aborts.
103			 *
104			 * So, just restart at the beginning of the file again,
105			 * and use plain reads from now on.
106			 */
107			BZ2_bzReadClose(&bzerr, bzbufdesc);
108			bzbufdesc = NULL;
109			if (lseek(f->fd, 0, SEEK_SET) == -1)
110				return (-1);
111			nr = read(f->fd, buffer, MAXBUFSIZ);
112			break;
113		default:
114			/* Make sure we exit with an error */
115			nr = -1;
116		}
117	} else
118#endif
119		nr = read(f->fd, buffer, MAXBUFSIZ);
120
121	if (nr < 0)
122		return (-1);
123
124	bufrem = nr;
125	return (0);
126}
127
128static inline int
129grep_lnbufgrow(size_t newlen)
130{
131
132	if (lnbuflen < newlen) {
133		lnbuf = grep_realloc(lnbuf, newlen);
134		lnbuflen = newlen;
135	}
136
137	return (0);
138}
139
140char *
141grep_fgetln(struct file *f, size_t *lenp)
142{
143	unsigned char *p;
144	char *ret;
145	size_t len;
146	size_t off;
147	ptrdiff_t diff;
148
149	/* Fill the buffer, if necessary */
150	if (bufrem == 0 && grep_refill(f) != 0)
151		goto error;
152
153	if (bufrem == 0) {
154		/* Return zero length to indicate EOF */
155		*lenp = 0;
156		return ((char *)bufpos);
157	}
158
159	/* Look for a newline in the remaining part of the buffer */
160	if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
161		++p; /* advance over newline */
162		ret = (char *)bufpos;
163		len = p - bufpos;
164		bufrem -= len;
165		bufpos = p;
166		*lenp = len;
167		return (ret);
168	}
169
170	/* We have to copy the current buffered data to the line buffer */
171	for (len = bufrem, off = 0; ; len += bufrem) {
172		/* Make sure there is room for more data */
173		if (grep_lnbufgrow(len + LNBUFBUMP))
174			goto error;
175		memcpy(lnbuf + off, bufpos, len - off);
176		off = len;
177		if (grep_refill(f) != 0)
178			goto error;
179		if (bufrem == 0)
180			/* EOF: return partial line */
181			break;
182		if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
183			continue;
184		/* got it: finish up the line (like code above) */
185		++p;
186		diff = p - bufpos;
187		len += diff;
188		if (grep_lnbufgrow(len))
189		    goto error;
190		memcpy(lnbuf + off, bufpos, diff);
191		bufrem -= diff;
192		bufpos = p;
193		break;
194	}
195	*lenp = len;
196	return ((char *)lnbuf);
197
198error:
199	*lenp = 0;
200	return (NULL);
201}
202
203static inline struct file *
204grep_file_init(struct file *f)
205{
206
207#ifndef __ANDROID__
208	if (filebehave == FILE_GZIP &&
209	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
210		goto error;
211
212	if (filebehave == FILE_BZIP &&
213	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
214		goto error;
215#endif
216
217	/* Fill read buffer, also catches errors early */
218	if (grep_refill(f) != 0)
219		goto error;
220
221	/* Check for binary stuff, if necessary */
222	if (!nulldataflag && binbehave != BINFILE_TEXT &&
223	    memchr(bufpos, '\0', bufrem) != NULL)
224		f->binary = true;
225
226	return (f);
227error:
228	close(f->fd);
229	free(f);
230	return (NULL);
231}
232
233/*
234 * Opens a file for processing.
235 */
236struct file *
237grep_open(const char *path)
238{
239	struct file *f;
240
241	f = grep_malloc(sizeof *f);
242	memset(f, 0, sizeof *f);
243	if (path == NULL) {
244		/* Processing stdin implies --line-buffered. */
245		lbflag = true;
246		f->fd = STDIN_FILENO;
247	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
248		free(f);
249		return (NULL);
250	}
251
252	return (grep_file_init(f));
253}
254
255/*
256 * Closes a file.
257 */
258void
259grep_close(struct file *f)
260{
261
262	close(f->fd);
263
264	/* Reset read buffer and line buffer */
265	bufpos = buffer;
266	bufrem = 0;
267
268	free(lnbuf);
269	lnbuf = NULL;
270	lnbuflen = 0;
271}
272