glob.c revision b6c5c8c51f954cfbe76424fd57c33a87166f0545
1/*
2 * This file is part of ltrace.
3 * Copyright (C) 2007, 2008, 2012 Petr Machata, Red Hat Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18 * 02110-1301 USA
19 */
20
21#include <sys/types.h>
22#include <regex.h>
23#include <string.h>
24#include <stdlib.h>
25#include <assert.h>
26
27static ssize_t
28match_character_class(const char *glob, size_t length, size_t from)
29{
30	size_t i;
31	if (length > 0)
32		for (i = from + 2; i < length - 1 && glob[++i] != ':'; )
33			;
34	if (i >= length || glob[++i] != ']')
35		return -1;
36	return i;
37}
38
39static ssize_t
40match_brack(const char *glob, size_t length, size_t from, int *exclmp)
41{
42	size_t i = from + 1;
43
44	if (i >= length)
45		return -1;
46
47	/* Complement operator.  */
48	*exclmp = 0;
49	if (glob[i] == '^' || glob[i] == '!') {
50		*exclmp = glob[i++] == '!';
51		if (i >= length)
52			return -1;
53	}
54
55	/* On first character, both [ and ] are legal.  But when [ is
56	 * followed with :, it's character class.  */
57	if (glob[i] == '[' && glob[i + 1] == ':') {
58		ssize_t j = match_character_class(glob, length, i);
59		if (j < 0)
60		fail:
61			return -1;
62		i = j;
63	}
64	++i; /* skip any character, including [ or ]  */
65
66	int escape = 0;
67	for (; i < length; ++i) {
68		char c = glob[i];
69		if (escape) {
70			++i;
71			escape = 0;
72
73		} else if (c == '[' && glob[i + 1] == ':') {
74			ssize_t j = match_character_class(glob, length, i);
75			if (j < 0)
76				goto fail;
77			i = j;
78
79		} else if (c == ']') {
80			return i;
81		}
82	}
83	return -1;
84}
85
86static int
87append(char **bufp, const char *str, size_t str_size,
88       size_t *sizep, size_t *allocp)
89{
90	if (str_size == 0)
91		str_size = strlen(str);
92	size_t nsize = *sizep + str_size;
93	if (nsize > *allocp) {
94		size_t nalloc = nsize * 2;
95		char *nbuf = realloc(*bufp, nalloc);
96		if (nbuf == NULL)
97			return -1;
98		*allocp = nalloc;
99		*bufp = nbuf;
100	}
101
102	memcpy(*bufp + *sizep, str, str_size);
103	*sizep = nsize;
104	return 0;
105}
106
107static int
108glob_to_regex(const char *glob, char **retp)
109{
110	size_t allocd = 0;
111	size_t size = 0;
112	char *buf = NULL;
113
114	size_t length = strlen(glob);
115	int escape = 0;
116	size_t i;
117	for(i = 0; i < length; ++i) {
118		char c = glob[i];
119		if (escape) {
120			if (c == '\\') {
121				if (append(&buf, "\\\\", 0,
122					   &size, &allocd) < 0) {
123				fail:
124					free(buf);
125					return REG_ESPACE;
126				}
127
128			} else if (c == '*') {
129				if (append(&buf, "\\*", 0, &size, &allocd) < 0)
130					goto fail;
131			} else if (c == '?') {
132				if (append(&buf, "?", 0, &size, &allocd) < 0)
133					goto fail;
134			} else if (append(&buf, (char[]){ '\\', c }, 2,
135					  &size, &allocd) < 0)
136				goto fail;
137			escape = 0;
138		} else {
139			if (c == '\\')
140				escape = 1;
141			else if (c == '[') {
142				int exclm;
143				ssize_t j = match_brack(glob, length, i, &exclm);
144				if (j < 0) {
145					free(buf);
146					return REG_EBRACK;
147				}
148				if (exclm
149				    && append(&buf, "[^", 2,
150					      &size, &allocd) < 0)
151					goto fail;
152				if (append(&buf, glob + i + 2*exclm,
153					   j - i + 1 - 2*exclm,
154					   &size, &allocd) < 0)
155					goto fail;
156				i = j;
157
158			} else if (c == '*') {
159				if (append(&buf, ".*", 0, &size, &allocd) < 0)
160					goto fail;
161			} else if (c == '?') {
162				if (append(&buf, ".", 0, &size, &allocd) < 0)
163					goto fail;
164			} else if (c == '.') {
165				if (append(&buf, "\\.", 0, &size, &allocd) < 0)
166					goto fail;
167			} else if (append(&buf, &c, 1, &size, &allocd) < 0)
168				goto fail;
169		}
170	}
171
172	if (escape) {
173		free(buf);
174		return REG_EESCAPE;
175	}
176
177	{
178		char c = 0;
179		if (append(&buf, &c, 1, &size, &allocd) < 0)
180			goto fail;
181	}
182	*retp = buf;
183	return 0;
184}
185
186int
187globcomp(regex_t *preg, const char *glob, int cflags)
188{
189	char *regex = NULL;
190	int status = glob_to_regex(glob, &regex);
191	if (status != 0)
192		return status;
193	assert(regex != NULL);
194	status = regcomp(preg, regex, cflags);
195	free(regex);
196	return status;
197}
198
199#ifdef TEST
200#include <stdio.h>
201
202static void
203translate(const char *glob, int exp_status, const char *expect)
204{
205	char *pattern = NULL;
206	int status = glob_to_regex(glob, &pattern);
207	if (status != exp_status) {
208		fprintf(stderr, "translating %s, expected status %d, got %d\n",
209			glob, exp_status, status);
210		return;
211	}
212
213	if (status == 0) {
214		assert(pattern != NULL);
215		if (strcmp(pattern, expect) != 0)
216			fprintf(stderr, "translating %s, expected %s, got %s\n",
217				glob, expect, pattern);
218		free(pattern);
219	} else {
220		assert(pattern == NULL);
221	}
222}
223
224static void
225try_match(const char *glob, const char *str, int expect)
226{
227	regex_t preg;
228	int status = globcomp(&preg, glob, 0);
229	assert(status == 0);
230	status = regexec(&preg, str, 0, NULL, 0);
231	assert(status == expect);
232	regfree(&preg);
233}
234
235int
236main(void)
237{
238        translate("*", 0, ".*");
239        translate("?", 0, ".");
240        translate(".*", 0, "\\..*");
241        translate("*.*", 0, ".*\\..*");
242        translate("*a*", 0, ".*a.*");
243        translate("[abc]", 0, "[abc]");
244        translate("[^abc]", 0, "[^abc]");
245        translate("[!abc]", 0, "[^abc]");
246        translate("[]]", 0, "[]]");
247        translate("[[]", 0, "[[]");
248        translate("[^]]", 0, "[^]]");
249        translate("[^a-z]", 0, "[^a-z]");
250        translate("[abc\\]]", 0, "[abc\\]]");
251        translate("[abc\\]def]", 0, "[abc\\]def]");
252        translate("[[:space:]]", 0, "[[:space:]]");
253        translate("[^[:space:]]", 0, "[^[:space:]]");
254        translate("[![:space:]]", 0, "[^[:space:]]");
255        translate("[^a-z]*", 0, "[^a-z].*");
256        translate("[^a-z]bar*", 0, "[^a-z]bar.*");
257	translate("*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.", 0,
258		  ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\."
259		  ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\.");
260
261        translate("\\", REG_EESCAPE, NULL);
262        translate("[^[:naotuh\\", REG_EBRACK, NULL);
263        translate("[^[:", REG_EBRACK, NULL);
264        translate("[^[", REG_EBRACK, NULL);
265        translate("[^", REG_EBRACK, NULL);
266        translate("[\\", REG_EBRACK, NULL);
267        translate("[", REG_EBRACK, NULL);
268        translate("abc[", REG_EBRACK, NULL);
269
270	try_match("abc*def", "abc012def", 0);
271	try_match("abc*def", "ab012def", REG_NOMATCH);
272	try_match("[abc]*def", "a1def", 0);
273	try_match("[abc]*def", "b1def", 0);
274	try_match("[abc]*def", "d1def", REG_NOMATCH);
275
276	return 0;
277}
278
279#endif
280