glob.c revision b6c5c8c51f954cfbe76424fd57c33a87166f0545
1/* 2 * This file is part of ltrace. 3 * Copyright (C) 2007, 2008, 2012 Petr Machata, Red Hat Inc. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation; either version 2 of the 8 * License, or (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 18 * 02110-1301 USA 19 */ 20 21#include <sys/types.h> 22#include <regex.h> 23#include <string.h> 24#include <stdlib.h> 25#include <assert.h> 26 27static ssize_t 28match_character_class(const char *glob, size_t length, size_t from) 29{ 30 size_t i; 31 if (length > 0) 32 for (i = from + 2; i < length - 1 && glob[++i] != ':'; ) 33 ; 34 if (i >= length || glob[++i] != ']') 35 return -1; 36 return i; 37} 38 39static ssize_t 40match_brack(const char *glob, size_t length, size_t from, int *exclmp) 41{ 42 size_t i = from + 1; 43 44 if (i >= length) 45 return -1; 46 47 /* Complement operator. */ 48 *exclmp = 0; 49 if (glob[i] == '^' || glob[i] == '!') { 50 *exclmp = glob[i++] == '!'; 51 if (i >= length) 52 return -1; 53 } 54 55 /* On first character, both [ and ] are legal. But when [ is 56 * followed with :, it's character class. */ 57 if (glob[i] == '[' && glob[i + 1] == ':') { 58 ssize_t j = match_character_class(glob, length, i); 59 if (j < 0) 60 fail: 61 return -1; 62 i = j; 63 } 64 ++i; /* skip any character, including [ or ] */ 65 66 int escape = 0; 67 for (; i < length; ++i) { 68 char c = glob[i]; 69 if (escape) { 70 ++i; 71 escape = 0; 72 73 } else if (c == '[' && glob[i + 1] == ':') { 74 ssize_t j = match_character_class(glob, length, i); 75 if (j < 0) 76 goto fail; 77 i = j; 78 79 } else if (c == ']') { 80 return i; 81 } 82 } 83 return -1; 84} 85 86static int 87append(char **bufp, const char *str, size_t str_size, 88 size_t *sizep, size_t *allocp) 89{ 90 if (str_size == 0) 91 str_size = strlen(str); 92 size_t nsize = *sizep + str_size; 93 if (nsize > *allocp) { 94 size_t nalloc = nsize * 2; 95 char *nbuf = realloc(*bufp, nalloc); 96 if (nbuf == NULL) 97 return -1; 98 *allocp = nalloc; 99 *bufp = nbuf; 100 } 101 102 memcpy(*bufp + *sizep, str, str_size); 103 *sizep = nsize; 104 return 0; 105} 106 107static int 108glob_to_regex(const char *glob, char **retp) 109{ 110 size_t allocd = 0; 111 size_t size = 0; 112 char *buf = NULL; 113 114 size_t length = strlen(glob); 115 int escape = 0; 116 size_t i; 117 for(i = 0; i < length; ++i) { 118 char c = glob[i]; 119 if (escape) { 120 if (c == '\\') { 121 if (append(&buf, "\\\\", 0, 122 &size, &allocd) < 0) { 123 fail: 124 free(buf); 125 return REG_ESPACE; 126 } 127 128 } else if (c == '*') { 129 if (append(&buf, "\\*", 0, &size, &allocd) < 0) 130 goto fail; 131 } else if (c == '?') { 132 if (append(&buf, "?", 0, &size, &allocd) < 0) 133 goto fail; 134 } else if (append(&buf, (char[]){ '\\', c }, 2, 135 &size, &allocd) < 0) 136 goto fail; 137 escape = 0; 138 } else { 139 if (c == '\\') 140 escape = 1; 141 else if (c == '[') { 142 int exclm; 143 ssize_t j = match_brack(glob, length, i, &exclm); 144 if (j < 0) { 145 free(buf); 146 return REG_EBRACK; 147 } 148 if (exclm 149 && append(&buf, "[^", 2, 150 &size, &allocd) < 0) 151 goto fail; 152 if (append(&buf, glob + i + 2*exclm, 153 j - i + 1 - 2*exclm, 154 &size, &allocd) < 0) 155 goto fail; 156 i = j; 157 158 } else if (c == '*') { 159 if (append(&buf, ".*", 0, &size, &allocd) < 0) 160 goto fail; 161 } else if (c == '?') { 162 if (append(&buf, ".", 0, &size, &allocd) < 0) 163 goto fail; 164 } else if (c == '.') { 165 if (append(&buf, "\\.", 0, &size, &allocd) < 0) 166 goto fail; 167 } else if (append(&buf, &c, 1, &size, &allocd) < 0) 168 goto fail; 169 } 170 } 171 172 if (escape) { 173 free(buf); 174 return REG_EESCAPE; 175 } 176 177 { 178 char c = 0; 179 if (append(&buf, &c, 1, &size, &allocd) < 0) 180 goto fail; 181 } 182 *retp = buf; 183 return 0; 184} 185 186int 187globcomp(regex_t *preg, const char *glob, int cflags) 188{ 189 char *regex = NULL; 190 int status = glob_to_regex(glob, ®ex); 191 if (status != 0) 192 return status; 193 assert(regex != NULL); 194 status = regcomp(preg, regex, cflags); 195 free(regex); 196 return status; 197} 198 199#ifdef TEST 200#include <stdio.h> 201 202static void 203translate(const char *glob, int exp_status, const char *expect) 204{ 205 char *pattern = NULL; 206 int status = glob_to_regex(glob, &pattern); 207 if (status != exp_status) { 208 fprintf(stderr, "translating %s, expected status %d, got %d\n", 209 glob, exp_status, status); 210 return; 211 } 212 213 if (status == 0) { 214 assert(pattern != NULL); 215 if (strcmp(pattern, expect) != 0) 216 fprintf(stderr, "translating %s, expected %s, got %s\n", 217 glob, expect, pattern); 218 free(pattern); 219 } else { 220 assert(pattern == NULL); 221 } 222} 223 224static void 225try_match(const char *glob, const char *str, int expect) 226{ 227 regex_t preg; 228 int status = globcomp(&preg, glob, 0); 229 assert(status == 0); 230 status = regexec(&preg, str, 0, NULL, 0); 231 assert(status == expect); 232 regfree(&preg); 233} 234 235int 236main(void) 237{ 238 translate("*", 0, ".*"); 239 translate("?", 0, "."); 240 translate(".*", 0, "\\..*"); 241 translate("*.*", 0, ".*\\..*"); 242 translate("*a*", 0, ".*a.*"); 243 translate("[abc]", 0, "[abc]"); 244 translate("[^abc]", 0, "[^abc]"); 245 translate("[!abc]", 0, "[^abc]"); 246 translate("[]]", 0, "[]]"); 247 translate("[[]", 0, "[[]"); 248 translate("[^]]", 0, "[^]]"); 249 translate("[^a-z]", 0, "[^a-z]"); 250 translate("[abc\\]]", 0, "[abc\\]]"); 251 translate("[abc\\]def]", 0, "[abc\\]def]"); 252 translate("[[:space:]]", 0, "[[:space:]]"); 253 translate("[^[:space:]]", 0, "[^[:space:]]"); 254 translate("[![:space:]]", 0, "[^[:space:]]"); 255 translate("[^a-z]*", 0, "[^a-z].*"); 256 translate("[^a-z]bar*", 0, "[^a-z]bar.*"); 257 translate("*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.", 0, 258 ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\." 259 ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\."); 260 261 translate("\\", REG_EESCAPE, NULL); 262 translate("[^[:naotuh\\", REG_EBRACK, NULL); 263 translate("[^[:", REG_EBRACK, NULL); 264 translate("[^[", REG_EBRACK, NULL); 265 translate("[^", REG_EBRACK, NULL); 266 translate("[\\", REG_EBRACK, NULL); 267 translate("[", REG_EBRACK, NULL); 268 translate("abc[", REG_EBRACK, NULL); 269 270 try_match("abc*def", "abc012def", 0); 271 try_match("abc*def", "ab012def", REG_NOMATCH); 272 try_match("[abc]*def", "a1def", 0); 273 try_match("[abc]*def", "b1def", 0); 274 try_match("[abc]*def", "d1def", REG_NOMATCH); 275 276 return 0; 277} 278 279#endif 280