glob.c revision 28cd84db222ce68ad13c6d37dfed5f3bb0a44784
1/* 2 * This file is part of ltrace. 3 * Copyright (C) 2007, 2008, 2012 Petr Machata, Red Hat Inc. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation; either version 2 of the 8 * License, or (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 18 * 02110-1301 USA 19 */ 20 21#include <sys/types.h> 22#include <regex.h> 23#include <string.h> 24#include <stdlib.h> 25 26static ssize_t 27match_character_class(const char *glob, size_t length, size_t from) 28{ 29 size_t i; 30 if (length > 0) 31 for (i = from + 2; i < length - 1 && glob[++i] != ':'; ) 32 ; 33 if (i >= length || glob[++i] != ']') 34 return -1; 35 return i; 36} 37 38static ssize_t 39match_brack(const char *glob, size_t length, size_t from, int *exclmp) 40{ 41 size_t i = from + 1; 42 43 if (i >= length) 44 return -1; 45 46 /* Complement operator. */ 47 *exclmp = 0; 48 if (glob[i] == '^' || glob[i] == '!') { 49 *exclmp = glob[i++] == '!'; 50 if (i >= length) 51 return -1; 52 } 53 54 /* On first character, both [ and ] are legal. But when [ is 55 * followed with :, it's character class. */ 56 if (glob[i] == '[' && glob[i + 1] == ':') { 57 ssize_t j = match_character_class(glob, length, i); 58 if (j < 0) 59 fail: 60 return -1; 61 i = j; 62 } 63 ++i; /* skip any character, including [ or ] */ 64 65 int escape = 0; 66 for (; i < length; ++i) { 67 char c = glob[i]; 68 if (escape) { 69 ++i; 70 escape = 0; 71 72 } else if (c == '[' && glob[i + 1] == ':') { 73 ssize_t j = match_character_class(glob, length, i); 74 if (j < 0) 75 goto fail; 76 i = j; 77 78 } else if (c == ']') { 79 return i; 80 } 81 } 82 return -1; 83} 84 85static int 86append(char **bufp, const char *str, size_t str_size, 87 size_t *sizep, size_t *allocp) 88{ 89 if (str_size == 0) 90 str_size = strlen(str); 91 size_t nsize = *sizep + str_size; 92 if (nsize > *allocp) { 93 size_t nalloc = nsize * 2; 94 char *nbuf = realloc(*bufp, nalloc); 95 if (nbuf == NULL) 96 return -1; 97 *allocp = nalloc; 98 *bufp = nbuf; 99 } 100 101 memcpy(*bufp + *sizep, str, str_size); 102 *sizep = nsize; 103 return 0; 104} 105 106static int 107glob_to_regex(const char *glob, char **retp) 108{ 109 size_t allocd = 0; 110 size_t size = 0; 111 char *buf = NULL; 112 113 size_t length = strlen(glob); 114 int escape = 0; 115 size_t i; 116 for(i = 0; i < length; ++i) { 117 char c = glob[i]; 118 if (escape) { 119 if (c == '\\') { 120 if (append(&buf, "\\\\", 0, 121 &size, &allocd) < 0) { 122 fail: 123 free(buf); 124 return REG_ESPACE; 125 } 126 127 } else if (c == '*') { 128 if (append(&buf, "\\*", 0, &size, &allocd) < 0) 129 goto fail; 130 } else if (c == '?') { 131 if (append(&buf, "?", 0, &size, &allocd) < 0) 132 goto fail; 133 } else if (append(&buf, (char[]){ '\\', c }, 2, 134 &size, &allocd) < 0) 135 goto fail; 136 escape = 0; 137 } else { 138 if (c == '\\') 139 escape = 1; 140 else if (c == '[') { 141 int exclm; 142 ssize_t j = match_brack(glob, length, i, &exclm); 143 if (j < 0) { 144 free(buf); 145 return REG_EBRACK; 146 } 147 if (exclm 148 && append(&buf, "[^", 2, 149 &size, &allocd) < 0) 150 goto fail; 151 if (append(&buf, glob + i + 2*exclm, 152 j - i + 1 - 2*exclm, 153 &size, &allocd) < 0) 154 goto fail; 155 i = j; 156 157 } else if (c == '*') { 158 if (append(&buf, ".*", 0, &size, &allocd) < 0) 159 goto fail; 160 } else if (c == '?') { 161 if (append(&buf, ".", 0, &size, &allocd) < 0) 162 goto fail; 163 } else if (c == '.') { 164 if (append(&buf, "\\.", 0, &size, &allocd) < 0) 165 goto fail; 166 } else if (append(&buf, &c, 1, &size, &allocd) < 0) 167 goto fail; 168 } 169 } 170 171 if (escape) { 172 free(buf); 173 return REG_EESCAPE; 174 } 175 176 { 177 char c = 0; 178 if (append(&buf, &c, 1, &size, &allocd) < 0) 179 goto fail; 180 } 181 *retp = buf; 182 return 0; 183} 184 185int 186globcomp(regex_t *preg, const char *glob, int cflags) 187{ 188 char *regex; 189 int status = glob_to_regex(glob, ®ex); 190 if (status != 0) 191 return status; 192 status = regcomp(preg, regex, cflags); 193 free(regex); 194 return status; 195} 196 197#ifdef TEST 198#include <assert.h> 199#include <stdio.h> 200 201static void 202translate(const char *glob, int exp_status, const char *expect) 203{ 204 char *pattern = NULL; 205 int status = glob_to_regex(glob, &pattern); 206 if (status != exp_status) { 207 fprintf(stderr, "translating %s, expected status %d, got %d\n", 208 glob, exp_status, status); 209 return; 210 } 211 212 if (status == 0) { 213 assert(pattern != NULL); 214 if (strcmp(pattern, expect) != 0) 215 fprintf(stderr, "translating %s, expected %s, got %s\n", 216 glob, expect, pattern); 217 free(pattern); 218 } else { 219 assert(pattern == NULL); 220 } 221} 222 223static void 224try_match(const char *glob, const char *str, int expect) 225{ 226 regex_t preg; 227 int status = globcomp(&preg, glob, 0); 228 assert(status == 0); 229 status = regexec(&preg, str, 0, NULL, 0); 230 assert(status == expect); 231 regfree(&preg); 232} 233 234int 235main(void) 236{ 237 translate("*", 0, ".*"); 238 translate("?", 0, "."); 239 translate(".*", 0, "\\..*"); 240 translate("*.*", 0, ".*\\..*"); 241 translate("*a*", 0, ".*a.*"); 242 translate("[abc]", 0, "[abc]"); 243 translate("[^abc]", 0, "[^abc]"); 244 translate("[!abc]", 0, "[^abc]"); 245 translate("[]]", 0, "[]]"); 246 translate("[[]", 0, "[[]"); 247 translate("[^]]", 0, "[^]]"); 248 translate("[^a-z]", 0, "[^a-z]"); 249 translate("[abc\\]]", 0, "[abc\\]]"); 250 translate("[abc\\]def]", 0, "[abc\\]def]"); 251 translate("[[:space:]]", 0, "[[:space:]]"); 252 translate("[^[:space:]]", 0, "[^[:space:]]"); 253 translate("[![:space:]]", 0, "[^[:space:]]"); 254 translate("[^a-z]*", 0, "[^a-z].*"); 255 translate("[^a-z]bar*", 0, "[^a-z]bar.*"); 256 translate("*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.", 0, 257 ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\." 258 ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\."); 259 260 translate("\\", REG_EESCAPE, NULL); 261 translate("[^[:naotuh\\", REG_EBRACK, NULL); 262 translate("[^[:", REG_EBRACK, NULL); 263 translate("[^[", REG_EBRACK, NULL); 264 translate("[^", REG_EBRACK, NULL); 265 translate("[\\", REG_EBRACK, NULL); 266 translate("[", REG_EBRACK, NULL); 267 translate("abc[", REG_EBRACK, NULL); 268 269 try_match("abc*def", "abc012def", 0); 270 try_match("abc*def", "ab012def", REG_NOMATCH); 271 try_match("[abc]*def", "a1def", 0); 272 try_match("[abc]*def", "b1def", 0); 273 try_match("[abc]*def", "d1def", REG_NOMATCH); 274 275 return 0; 276} 277 278#endif 279