glob.c revision 28cd84db222ce68ad13c6d37dfed5f3bb0a44784
1/*
2 * This file is part of ltrace.
3 * Copyright (C) 2007, 2008, 2012 Petr Machata, Red Hat Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18 * 02110-1301 USA
19 */
20
21#include <sys/types.h>
22#include <regex.h>
23#include <string.h>
24#include <stdlib.h>
25
26static ssize_t
27match_character_class(const char *glob, size_t length, size_t from)
28{
29	size_t i;
30	if (length > 0)
31		for (i = from + 2; i < length - 1 && glob[++i] != ':'; )
32			;
33	if (i >= length || glob[++i] != ']')
34		return -1;
35	return i;
36}
37
38static ssize_t
39match_brack(const char *glob, size_t length, size_t from, int *exclmp)
40{
41	size_t i = from + 1;
42
43	if (i >= length)
44		return -1;
45
46	/* Complement operator.  */
47	*exclmp = 0;
48	if (glob[i] == '^' || glob[i] == '!') {
49		*exclmp = glob[i++] == '!';
50		if (i >= length)
51			return -1;
52	}
53
54	/* On first character, both [ and ] are legal.  But when [ is
55	 * followed with :, it's character class.  */
56	if (glob[i] == '[' && glob[i + 1] == ':') {
57		ssize_t j = match_character_class(glob, length, i);
58		if (j < 0)
59		fail:
60			return -1;
61		i = j;
62	}
63	++i; /* skip any character, including [ or ]  */
64
65	int escape = 0;
66	for (; i < length; ++i) {
67		char c = glob[i];
68		if (escape) {
69			++i;
70			escape = 0;
71
72		} else if (c == '[' && glob[i + 1] == ':') {
73			ssize_t j = match_character_class(glob, length, i);
74			if (j < 0)
75				goto fail;
76			i = j;
77
78		} else if (c == ']') {
79			return i;
80		}
81	}
82	return -1;
83}
84
85static int
86append(char **bufp, const char *str, size_t str_size,
87       size_t *sizep, size_t *allocp)
88{
89	if (str_size == 0)
90		str_size = strlen(str);
91	size_t nsize = *sizep + str_size;
92	if (nsize > *allocp) {
93		size_t nalloc = nsize * 2;
94		char *nbuf = realloc(*bufp, nalloc);
95		if (nbuf == NULL)
96			return -1;
97		*allocp = nalloc;
98		*bufp = nbuf;
99	}
100
101	memcpy(*bufp + *sizep, str, str_size);
102	*sizep = nsize;
103	return 0;
104}
105
106static int
107glob_to_regex(const char *glob, char **retp)
108{
109	size_t allocd = 0;
110	size_t size = 0;
111	char *buf = NULL;
112
113	size_t length = strlen(glob);
114	int escape = 0;
115	size_t i;
116	for(i = 0; i < length; ++i) {
117		char c = glob[i];
118		if (escape) {
119			if (c == '\\') {
120				if (append(&buf, "\\\\", 0,
121					   &size, &allocd) < 0) {
122				fail:
123					free(buf);
124					return REG_ESPACE;
125				}
126
127			} else if (c == '*') {
128				if (append(&buf, "\\*", 0, &size, &allocd) < 0)
129					goto fail;
130			} else if (c == '?') {
131				if (append(&buf, "?", 0, &size, &allocd) < 0)
132					goto fail;
133			} else if (append(&buf, (char[]){ '\\', c }, 2,
134					  &size, &allocd) < 0)
135				goto fail;
136			escape = 0;
137		} else {
138			if (c == '\\')
139				escape = 1;
140			else if (c == '[') {
141				int exclm;
142				ssize_t j = match_brack(glob, length, i, &exclm);
143				if (j < 0) {
144					free(buf);
145					return REG_EBRACK;
146				}
147				if (exclm
148				    && append(&buf, "[^", 2,
149					      &size, &allocd) < 0)
150					goto fail;
151				if (append(&buf, glob + i + 2*exclm,
152					   j - i + 1 - 2*exclm,
153					   &size, &allocd) < 0)
154					goto fail;
155				i = j;
156
157			} else if (c == '*') {
158				if (append(&buf, ".*", 0, &size, &allocd) < 0)
159					goto fail;
160			} else if (c == '?') {
161				if (append(&buf, ".", 0, &size, &allocd) < 0)
162					goto fail;
163			} else if (c == '.') {
164				if (append(&buf, "\\.", 0, &size, &allocd) < 0)
165					goto fail;
166			} else if (append(&buf, &c, 1, &size, &allocd) < 0)
167				goto fail;
168		}
169	}
170
171	if (escape) {
172		free(buf);
173		return REG_EESCAPE;
174	}
175
176	{
177		char c = 0;
178		if (append(&buf, &c, 1, &size, &allocd) < 0)
179			goto fail;
180	}
181	*retp = buf;
182	return 0;
183}
184
185int
186globcomp(regex_t *preg, const char *glob, int cflags)
187{
188	char *regex;
189	int status = glob_to_regex(glob, &regex);
190	if (status != 0)
191		return status;
192	status = regcomp(preg, regex, cflags);
193	free(regex);
194	return status;
195}
196
197#ifdef TEST
198#include <assert.h>
199#include <stdio.h>
200
201static void
202translate(const char *glob, int exp_status, const char *expect)
203{
204	char *pattern = NULL;
205	int status = glob_to_regex(glob, &pattern);
206	if (status != exp_status) {
207		fprintf(stderr, "translating %s, expected status %d, got %d\n",
208			glob, exp_status, status);
209		return;
210	}
211
212	if (status == 0) {
213		assert(pattern != NULL);
214		if (strcmp(pattern, expect) != 0)
215			fprintf(stderr, "translating %s, expected %s, got %s\n",
216				glob, expect, pattern);
217		free(pattern);
218	} else {
219		assert(pattern == NULL);
220	}
221}
222
223static void
224try_match(const char *glob, const char *str, int expect)
225{
226	regex_t preg;
227	int status = globcomp(&preg, glob, 0);
228	assert(status == 0);
229	status = regexec(&preg, str, 0, NULL, 0);
230	assert(status == expect);
231	regfree(&preg);
232}
233
234int
235main(void)
236{
237        translate("*", 0, ".*");
238        translate("?", 0, ".");
239        translate(".*", 0, "\\..*");
240        translate("*.*", 0, ".*\\..*");
241        translate("*a*", 0, ".*a.*");
242        translate("[abc]", 0, "[abc]");
243        translate("[^abc]", 0, "[^abc]");
244        translate("[!abc]", 0, "[^abc]");
245        translate("[]]", 0, "[]]");
246        translate("[[]", 0, "[[]");
247        translate("[^]]", 0, "[^]]");
248        translate("[^a-z]", 0, "[^a-z]");
249        translate("[abc\\]]", 0, "[abc\\]]");
250        translate("[abc\\]def]", 0, "[abc\\]def]");
251        translate("[[:space:]]", 0, "[[:space:]]");
252        translate("[^[:space:]]", 0, "[^[:space:]]");
253        translate("[![:space:]]", 0, "[^[:space:]]");
254        translate("[^a-z]*", 0, "[^a-z].*");
255        translate("[^a-z]bar*", 0, "[^a-z]bar.*");
256	translate("*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.", 0,
257		  ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\."
258		  ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\.");
259
260        translate("\\", REG_EESCAPE, NULL);
261        translate("[^[:naotuh\\", REG_EBRACK, NULL);
262        translate("[^[:", REG_EBRACK, NULL);
263        translate("[^[", REG_EBRACK, NULL);
264        translate("[^", REG_EBRACK, NULL);
265        translate("[\\", REG_EBRACK, NULL);
266        translate("[", REG_EBRACK, NULL);
267        translate("abc[", REG_EBRACK, NULL);
268
269	try_match("abc*def", "abc012def", 0);
270	try_match("abc*def", "ab012def", REG_NOMATCH);
271	try_match("[abc]*def", "a1def", 0);
272	try_match("[abc]*def", "b1def", 0);
273	try_match("[abc]*def", "d1def", REG_NOMATCH);
274
275	return 0;
276}
277
278#endif
279