glob.c revision d435b43c338a692531af0fe3df29cb968a863403
1/* 2 * This file is part of ltrace. 3 * Copyright (C) 2007,2008,2012,2013 Petr Machata, Red Hat Inc. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation; either version 2 of the 8 * License, or (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 18 * 02110-1301 USA 19 */ 20 21#include <sys/types.h> 22#include <regex.h> 23#include <string.h> 24#include <stdlib.h> 25#include <assert.h> 26 27static ssize_t 28match_character_class(const char *glob, size_t length, size_t from) 29{ 30 size_t i = 0; 31 if (length > 0) 32 for (i = from + 2; i < length - 1 && glob[++i] != ':'; ) 33 ; 34 if (i >= length || glob[++i] != ']') 35 return -1; 36 return i; 37} 38 39static ssize_t 40match_brack(const char *glob, size_t length, size_t from, int *exclmp) 41{ 42 size_t i = from + 1; 43 44 if (i >= length) 45 return -1; 46 47 /* Complement operator. */ 48 *exclmp = 0; 49 if (glob[i] == '^' || glob[i] == '!') { 50 *exclmp = glob[i++] == '!'; 51 if (i >= length) 52 return -1; 53 } 54 55 /* On first character, both [ and ] are legal. But when [ is 56 * followed with :, it's character class. */ 57 if (glob[i] == '[' && glob[i + 1] == ':') { 58 ssize_t j = match_character_class(glob, length, i); 59 if (j < 0) 60 fail: 61 return -1; 62 i = j; 63 } 64 ++i; /* skip any character, including [ or ] */ 65 66 for (; i < length; ++i) { 67 char c = glob[i]; 68 if (c == '[' && glob[i + 1] == ':') { 69 ssize_t j = match_character_class(glob, length, i); 70 if (j < 0) 71 goto fail; 72 i = j; 73 74 } else if (c == ']') { 75 return i; 76 } 77 } 78 return -1; 79} 80 81static int 82append(char **bufp, const char *str, size_t str_size, 83 size_t *sizep, size_t *allocp) 84{ 85 if (str_size == 0) 86 str_size = strlen(str); 87 size_t nsize = *sizep + str_size; 88 if (nsize > *allocp) { 89 size_t nalloc = nsize * 2; 90 char *nbuf = realloc(*bufp, nalloc); 91 if (nbuf == NULL) 92 return -1; 93 *allocp = nalloc; 94 *bufp = nbuf; 95 } 96 97 memcpy(*bufp + *sizep, str, str_size); 98 *sizep = nsize; 99 return 0; 100} 101 102static int 103glob_to_regex(const char *glob, char **retp) 104{ 105 size_t allocd = 0; 106 size_t size = 0; 107 char *buf = NULL; 108 109 size_t length = strlen(glob); 110 int escape = 0; 111 size_t i; 112 for(i = 0; i < length; ++i) { 113 char c = glob[i]; 114 if (escape) { 115 if (c == '\\') { 116 if (append(&buf, "\\\\", 0, 117 &size, &allocd) < 0) { 118 fail: 119 free(buf); 120 return REG_ESPACE; 121 } 122 123 } else if (c == '*') { 124 if (append(&buf, "\\*", 0, &size, &allocd) < 0) 125 goto fail; 126 } else if (c == '?') { 127 if (append(&buf, "?", 0, &size, &allocd) < 0) 128 goto fail; 129 } else if (append(&buf, (char[]){ '\\', c }, 2, 130 &size, &allocd) < 0) 131 goto fail; 132 escape = 0; 133 } else { 134 if (c == '\\') 135 escape = 1; 136 else if (c == '[') { 137 int exclm; 138 ssize_t j = match_brack(glob, length, i, &exclm); 139 if (j < 0) { 140 free(buf); 141 return REG_EBRACK; 142 } 143 if (exclm 144 && append(&buf, "[^", 2, 145 &size, &allocd) < 0) 146 goto fail; 147 if (append(&buf, glob + i + 2*exclm, 148 j - i + 1 - 2*exclm, 149 &size, &allocd) < 0) 150 goto fail; 151 i = j; 152 153 } else if (c == '*') { 154 if (append(&buf, ".*", 0, &size, &allocd) < 0) 155 goto fail; 156 } else if (c == '?') { 157 if (append(&buf, ".", 0, &size, &allocd) < 0) 158 goto fail; 159 } else if (c == '.') { 160 if (append(&buf, "\\.", 0, &size, &allocd) < 0) 161 goto fail; 162 } else if (append(&buf, &c, 1, &size, &allocd) < 0) 163 goto fail; 164 } 165 } 166 167 if (escape) { 168 free(buf); 169 return REG_EESCAPE; 170 } 171 172 { 173 char c = 0; 174 if (append(&buf, &c, 1, &size, &allocd) < 0) 175 goto fail; 176 } 177 *retp = buf; 178 return 0; 179} 180 181int 182globcomp(regex_t *preg, const char *glob, int cflags) 183{ 184 char *regex = NULL; 185 int status = glob_to_regex(glob, ®ex); 186 if (status != 0) 187 return status; 188 assert(regex != NULL); 189 status = regcomp(preg, regex, cflags); 190 free(regex); 191 return status; 192} 193 194#ifdef TEST 195#include <stdio.h> 196 197static void 198translate(const char *glob, int exp_status, const char *expect) 199{ 200 char *pattern = NULL; 201 int status = glob_to_regex(glob, &pattern); 202 if (status != exp_status) { 203 fprintf(stderr, "translating %s, expected status %d, got %d\n", 204 glob, exp_status, status); 205 return; 206 } 207 208 if (status == 0) { 209 assert(pattern != NULL); 210 if (strcmp(pattern, expect) != 0) 211 fprintf(stderr, "translating %s, expected %s, got %s\n", 212 glob, expect, pattern); 213 free(pattern); 214 } else { 215 assert(pattern == NULL); 216 } 217} 218 219static void 220try_match(const char *glob, const char *str, int expect) 221{ 222 regex_t preg; 223 int status = globcomp(&preg, glob, 0); 224 assert(status == 0); 225 status = regexec(&preg, str, 0, NULL, 0); 226 assert(status == expect); 227 regfree(&preg); 228} 229 230int 231main(void) 232{ 233 translate("*", 0, ".*"); 234 translate("?", 0, "."); 235 translate(".*", 0, "\\..*"); 236 translate("*.*", 0, ".*\\..*"); 237 translate("*a*", 0, ".*a.*"); 238 translate("[abc]", 0, "[abc]"); 239 translate("[^abc]", 0, "[^abc]"); 240 translate("[!abc]", 0, "[^abc]"); 241 translate("[]]", 0, "[]]"); 242 translate("[[]", 0, "[[]"); 243 translate("[^]]", 0, "[^]]"); 244 translate("[^a-z]", 0, "[^a-z]"); 245 translate("[abc\\]]", 0, "[abc\\]]"); 246 translate("[abc\\]def]", 0, "[abc\\]def]"); 247 translate("[[:space:]]", 0, "[[:space:]]"); 248 translate("[^[:space:]]", 0, "[^[:space:]]"); 249 translate("[![:space:]]", 0, "[^[:space:]]"); 250 translate("[^a-z]*", 0, "[^a-z].*"); 251 translate("[^a-z]bar*", 0, "[^a-z]bar.*"); 252 translate("*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.", 0, 253 ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\." 254 ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\."); 255 256 translate("\\", REG_EESCAPE, NULL); 257 translate("[^[:naotuh\\", REG_EBRACK, NULL); 258 translate("[^[:", REG_EBRACK, NULL); 259 translate("[^[", REG_EBRACK, NULL); 260 translate("[^", REG_EBRACK, NULL); 261 translate("[\\", REG_EBRACK, NULL); 262 translate("[", REG_EBRACK, NULL); 263 translate("abc[", REG_EBRACK, NULL); 264 265 try_match("abc*def", "abc012def", 0); 266 try_match("abc*def", "ab012def", REG_NOMATCH); 267 try_match("[abc]*def", "a1def", 0); 268 try_match("[abc]*def", "b1def", 0); 269 try_match("[abc]*def", "d1def", REG_NOMATCH); 270 271 return 0; 272} 273 274#endif 275