1/*
2 * This file is part of ltrace.
3 * Copyright (C) 2007,2008,2012,2013 Petr Machata, Red Hat Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18 * 02110-1301 USA
19 */
20
21#include <sys/types.h>
22#include <regex.h>
23#include <string.h>
24#include <stdlib.h>
25#include <assert.h>
26
27static ssize_t
28match_character_class(const char *glob, size_t length, size_t from)
29{
30	assert(length > 0);
31	const char *colon = memchr(glob + from + 2, ':', length - 1);
32	if (colon == NULL || colon[1] != ']')
33		return -1;
34	return colon - glob;
35}
36
37static ssize_t
38match_brack(const char *glob, size_t length, size_t from, int *exclmp)
39{
40	size_t i = from + 1;
41
42	if (i >= length)
43		return -1;
44
45	/* Complement operator.  */
46	*exclmp = 0;
47	if (glob[i] == '^' || glob[i] == '!') {
48		*exclmp = glob[i++] == '!';
49		if (i >= length)
50			return -1;
51	}
52
53	/* On first character, both [ and ] are legal.  But when [ is
54	 * followed with :, it's character class.  */
55	if (glob[i] == '[' && glob[i + 1] == ':') {
56		ssize_t j = match_character_class(glob, length, i);
57		if (j < 0)
58		fail:
59			return -1;
60		i = j;
61	}
62	++i; /* skip any character, including [ or ]  */
63
64	for (; i < length; ++i) {
65		char c = glob[i];
66		if (c == '[' && glob[i + 1] == ':') {
67			ssize_t j = match_character_class(glob, length, i);
68			if (j < 0)
69				goto fail;
70			i = j;
71
72		} else if (c == ']') {
73			return i;
74		}
75	}
76	return -1;
77}
78
79static int
80append(char **bufp, const char *str, size_t str_size,
81       size_t *sizep, size_t *allocp)
82{
83	if (str_size == 0)
84		str_size = strlen(str);
85	size_t nsize = *sizep + str_size;
86	if (nsize > *allocp) {
87		size_t nalloc = nsize * 2;
88		char *nbuf = realloc(*bufp, nalloc);
89		if (nbuf == NULL)
90			return -1;
91		*allocp = nalloc;
92		*bufp = nbuf;
93	}
94
95	memcpy(*bufp + *sizep, str, str_size);
96	*sizep = nsize;
97	return 0;
98}
99
100static int
101glob_to_regex(const char *glob, char **retp)
102{
103	size_t allocd = 0;
104	size_t size = 0;
105	char *buf = NULL;
106
107	size_t length = strlen(glob);
108	int escape = 0;
109	size_t i;
110	for(i = 0; i < length; ++i) {
111		char c = glob[i];
112		if (escape) {
113			if (c == '\\') {
114				if (append(&buf, "\\\\", 0,
115					   &size, &allocd) < 0) {
116				fail:
117					free(buf);
118					return REG_ESPACE;
119				}
120
121			} else if (c == '*') {
122				if (append(&buf, "\\*", 0, &size, &allocd) < 0)
123					goto fail;
124			} else if (c == '?') {
125				if (append(&buf, "?", 0, &size, &allocd) < 0)
126					goto fail;
127			} else if (append(&buf, (char[]){ '\\', c }, 2,
128					  &size, &allocd) < 0)
129				goto fail;
130			escape = 0;
131		} else {
132			if (c == '\\')
133				escape = 1;
134			else if (c == '[') {
135				int exclm;
136				ssize_t j = match_brack(glob, length, i, &exclm);
137				if (j < 0) {
138					free(buf);
139					return REG_EBRACK;
140				}
141				if (exclm
142				    && append(&buf, "[^", 2,
143					      &size, &allocd) < 0)
144					goto fail;
145				if (append(&buf, glob + i + 2*exclm,
146					   j - i + 1 - 2*exclm,
147					   &size, &allocd) < 0)
148					goto fail;
149				i = j;
150
151			} else if (c == '*') {
152				if (append(&buf, ".*", 0, &size, &allocd) < 0)
153					goto fail;
154			} else if (c == '?') {
155				if (append(&buf, ".", 0, &size, &allocd) < 0)
156					goto fail;
157			} else if (c == '.') {
158				if (append(&buf, "\\.", 0, &size, &allocd) < 0)
159					goto fail;
160			} else if (append(&buf, &c, 1, &size, &allocd) < 0)
161				goto fail;
162		}
163	}
164
165	if (escape) {
166		free(buf);
167		return REG_EESCAPE;
168	}
169
170	{
171		char c = 0;
172		if (append(&buf, &c, 1, &size, &allocd) < 0)
173			goto fail;
174	}
175	*retp = buf;
176	return 0;
177}
178
179int
180globcomp(regex_t *preg, const char *glob, int cflags)
181{
182	char *regex = NULL;
183	int status = glob_to_regex(glob, &regex);
184	if (status != 0)
185		return status;
186	assert(regex != NULL);
187	status = regcomp(preg, regex, cflags);
188	free(regex);
189	return status;
190}
191
192#ifdef TEST
193#include <stdio.h>
194
195static void
196translate(const char *glob, int exp_status, const char *expect)
197{
198	char *pattern = NULL;
199	int status = glob_to_regex(glob, &pattern);
200	if (status != exp_status) {
201		fprintf(stderr, "translating %s, expected status %d, got %d\n",
202			glob, exp_status, status);
203		return;
204	}
205
206	if (status == 0) {
207		assert(pattern != NULL);
208		if (strcmp(pattern, expect) != 0)
209			fprintf(stderr, "translating %s, expected %s, got %s\n",
210				glob, expect, pattern);
211		free(pattern);
212	} else {
213		assert(pattern == NULL);
214	}
215}
216
217static void
218try_match(const char *glob, const char *str, int expect)
219{
220	regex_t preg;
221	int status = globcomp(&preg, glob, 0);
222	assert(status == 0);
223	status = regexec(&preg, str, 0, NULL, 0);
224	assert(status == expect);
225	regfree(&preg);
226}
227
228int
229main(void)
230{
231        translate("*", 0, ".*");
232        translate("?", 0, ".");
233        translate(".*", 0, "\\..*");
234        translate("*.*", 0, ".*\\..*");
235        translate("*a*", 0, ".*a.*");
236        translate("[abc]", 0, "[abc]");
237        translate("[^abc]", 0, "[^abc]");
238        translate("[!abc]", 0, "[^abc]");
239        translate("[]]", 0, "[]]");
240        translate("[[]", 0, "[[]");
241        translate("[^]]", 0, "[^]]");
242        translate("[^a-z]", 0, "[^a-z]");
243        translate("[abc\\]]", 0, "[abc\\]]");
244        translate("[abc\\]def]", 0, "[abc\\]def]");
245        translate("[[:space:]]", 0, "[[:space:]]");
246        translate("[^[:space:]]", 0, "[^[:space:]]");
247        translate("[![:space:]]", 0, "[^[:space:]]");
248        translate("[^a-z]*", 0, "[^a-z].*");
249        translate("[^a-z]bar*", 0, "[^a-z]bar.*");
250	translate("*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.", 0,
251		  ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\."
252		  ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\.");
253
254        translate("\\", REG_EESCAPE, NULL);
255        translate("[^[:naotuh\\", REG_EBRACK, NULL);
256        translate("[^[:", REG_EBRACK, NULL);
257        translate("[^[", REG_EBRACK, NULL);
258        translate("[^", REG_EBRACK, NULL);
259        translate("[\\", REG_EBRACK, NULL);
260        translate("[", REG_EBRACK, NULL);
261        translate("abc[", REG_EBRACK, NULL);
262
263	try_match("abc*def", "abc012def", 0);
264	try_match("abc*def", "ab012def", REG_NOMATCH);
265	try_match("[abc]*def", "a1def", 0);
266	try_match("[abc]*def", "b1def", 0);
267	try_match("[abc]*def", "d1def", REG_NOMATCH);
268
269	return 0;
270}
271
272#endif
273