1/* $OpenBSD: fnmatch.c,v 1.19 2015/08/01 18:11:08 millert Exp $ */ 2 3/* Copyright (c) 2011, VMware, Inc. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * * Neither the name of the VMware, Inc. nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE FOR 21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 * Copyright (c) 2008 Todd C. Miller <millert@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45/* Authored by William A. Rowe Jr. <wrowe; apache.org, vmware.com>, April 2011 46 * 47 * Derived from The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008 48 * as described in; 49 * http://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html 50 * 51 * Filename pattern matches defined in section 2.13, "Pattern Matching Notation" 52 * from chapter 2. "Shell Command Language" 53 * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13 54 * where; 1. A bracket expression starting with an unquoted <circumflex> '^' 55 * character CONTINUES to specify a non-matching list; 2. an explicit <period> '.' 56 * in a bracket expression matching list, e.g. "[.abc]" does NOT match a leading 57 * <period> in a filename; 3. a <left-square-bracket> '[' which does not introduce 58 * a valid bracket expression is treated as an ordinary character; 4. a differing 59 * number of consecutive slashes within pattern and string will NOT match; 60 * 5. a trailing '\' in FNM_ESCAPE mode is treated as an ordinary '\' character. 61 * 62 * Bracket expansion defined in section 9.3.5, "RE Bracket Expression", 63 * from chapter 9, "Regular Expressions" 64 * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05 65 * with no support for collating symbols, equivalence class expressions or 66 * character class expressions. A partial range expression with a leading 67 * hyphen following a valid range expression will match only the ordinary 68 * <hyphen> and the ending character (e.g. "[a-m-z]" will match characters 69 * 'a' through 'm', a <hyphen> '-', or a 'z'). 70 * 71 * Supports BSD extensions FNM_LEADING_DIR to match pattern to the end of one 72 * path segment of string, and FNM_CASEFOLD to ignore alpha case. 73 * 74 * NOTE: Only POSIX/C single byte locales are correctly supported at this time. 75 * Notably, non-POSIX locales with FNM_CASEFOLD produce undefined results, 76 * particularly in ranges of mixed case (e.g. "[A-z]") or spanning alpha and 77 * nonalpha characters within a range. 78 * 79 * XXX comments below indicate porting required for multi-byte character sets 80 * and non-POSIX locale collation orders; requires mbr* APIs to track shift 81 * state of pattern and string (rewinding pattern and string repeatedly). 82 * 83 * Certain parts of the code assume 0x00-0x3F are unique with any MBCS (e.g. 84 * UTF-8, SHIFT-JIS, etc). Any implementation allowing '\' as an alternate 85 * path delimiter must be aware that 0x5C is NOT unique within SHIFT-JIS. 86 */ 87 88#include <fnmatch.h> 89#include <string.h> 90#include <ctype.h> 91 92#include "charclass.h" 93 94#define RANGE_MATCH 1 95#define RANGE_NOMATCH 0 96#define RANGE_ERROR (-1) 97 98static int 99classmatch(const char *pattern, char test, int foldcase, const char **ep) 100{ 101 struct cclass *cc; 102 const char *colon; 103 size_t len; 104 int rval = RANGE_NOMATCH; 105 const char * const mismatch = pattern; 106 107 if (*pattern != '[' || pattern[1] != ':') { 108 *ep = mismatch; 109 return(RANGE_ERROR); 110 } 111 112 pattern += 2; 113 114 if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') { 115 *ep = mismatch; 116 return(RANGE_ERROR); 117 } 118 *ep = colon + 2; 119 len = (size_t)(colon - pattern); 120 121 if (foldcase && strncmp(pattern, "upper:]", 7) == 0) 122 pattern = "lower:]"; 123 for (cc = cclasses; cc->name != NULL; cc++) { 124 if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') { 125 if (cc->isctype((unsigned char)test)) 126 rval = RANGE_MATCH; 127 break; 128 } 129 } 130 if (cc->name == NULL) { 131 /* invalid character class, treat as normal text */ 132 *ep = mismatch; 133 rval = RANGE_ERROR; 134 } 135 return(rval); 136} 137 138/* Most MBCS/collation/case issues handled here. Wildcard '*' is not handled. 139 * EOS '\0' and the FNM_PATHNAME '/' delimiters are not advanced over, 140 * however the "\/" sequence is advanced to '/'. 141 * 142 * Both pattern and string are **char to support pointer increment of arbitrary 143 * multibyte characters for the given locale, in a later iteration of this code 144 */ 145static int fnmatch_ch(const char **pattern, const char **string, int flags) 146{ 147 const char * const mismatch = *pattern; 148 const int nocase = !!(flags & FNM_CASEFOLD); 149 const int escape = !(flags & FNM_NOESCAPE); 150 const int slash = !!(flags & FNM_PATHNAME); 151 int result = FNM_NOMATCH; 152 const char *startch; 153 int negate; 154 155 if (**pattern == '[') 156 { 157 ++*pattern; 158 159 /* Handle negation, either leading ! or ^ operators (never both) */ 160 negate = ((**pattern == '!') || (**pattern == '^')); 161 if (negate) 162 ++*pattern; 163 164 /* ']' is an ordinary character at the start of the range pattern */ 165 if (**pattern == ']') 166 goto leadingclosebrace; 167 168 while (**pattern) 169 { 170 if (**pattern == ']') { 171 ++*pattern; 172 /* XXX: Fix for MBCS character width */ 173 ++*string; 174 return (result ^ negate); 175 } 176 177 if (escape && (**pattern == '\\')) { 178 ++*pattern; 179 180 /* Patterns must be terminated with ']', not EOS */ 181 if (!**pattern) 182 break; 183 } 184 185 /* Patterns must be terminated with ']' not '/' */ 186 if (slash && (**pattern == '/')) 187 break; 188 189 /* Match character classes. */ 190 if (classmatch(*pattern, **string, nocase, pattern) 191 == RANGE_MATCH) { 192 result = 0; 193 continue; 194 } 195 if (!**pattern) 196 break; 197 198leadingclosebrace: 199 /* Look at only well-formed range patterns; 200 * "x-]" is not allowed unless escaped ("x-\]") 201 * XXX: Fix for locale/MBCS character width 202 */ 203 if (((*pattern)[1] == '-') && ((*pattern)[2] != ']')) 204 { 205 startch = *pattern; 206 *pattern += (escape && ((*pattern)[2] == '\\')) ? 3 : 2; 207 208 /* NOT a properly balanced [expr] pattern, EOS terminated 209 * or ranges containing a slash in FNM_PATHNAME mode pattern 210 * fall out to to the rewind and test '[' literal code path 211 */ 212 if (!**pattern || (slash && (**pattern == '/'))) 213 break; 214 215 /* XXX: handle locale/MBCS comparison, advance by MBCS char width */ 216 if ((**string >= *startch) && (**string <= **pattern)) 217 result = 0; 218 else if (nocase && (isupper((unsigned char)**string) || 219 isupper((unsigned char)*startch) || 220 isupper((unsigned char)**pattern)) 221 && (tolower((unsigned char)**string) >= 222 tolower((unsigned char)*startch)) 223 && (tolower((unsigned char)**string) <= 224 tolower((unsigned char)**pattern))) 225 result = 0; 226 227 ++*pattern; 228 continue; 229 } 230 231 /* XXX: handle locale/MBCS comparison, advance by MBCS char width */ 232 if ((**string == **pattern)) 233 result = 0; 234 else if (nocase && (isupper((unsigned char)**string) || 235 isupper((unsigned char)**pattern)) 236 && (tolower((unsigned char)**string) == 237 tolower((unsigned char)**pattern))) 238 result = 0; 239 240 ++*pattern; 241 } 242 243 /* NOT a properly balanced [expr] pattern; Rewind 244 * and reset result to test '[' literal 245 */ 246 *pattern = mismatch; 247 result = FNM_NOMATCH; 248 } 249 else if (**pattern == '?') { 250 /* Optimize '?' match before unescaping **pattern */ 251 if (!**string || (slash && (**string == '/'))) 252 return FNM_NOMATCH; 253 result = 0; 254 goto fnmatch_ch_success; 255 } 256 else if (escape && (**pattern == '\\') && (*pattern)[1]) { 257 ++*pattern; 258 } 259 260 /* XXX: handle locale/MBCS comparison, advance by the MBCS char width */ 261 if (**string == **pattern) 262 result = 0; 263 else if (nocase && (isupper((unsigned char)**string) || 264 isupper((unsigned char)**pattern)) 265 && (tolower((unsigned char)**string) == 266 tolower((unsigned char)**pattern))) 267 result = 0; 268 269 /* Refuse to advance over trailing slash or nulls 270 */ 271 if (!**string || !**pattern || (slash && ((**string == '/') || (**pattern == '/')))) 272 return result; 273 274fnmatch_ch_success: 275 ++*pattern; 276 ++*string; 277 return result; 278} 279 280 281int fnmatch(const char *pattern, const char *string, int flags) 282{ 283 static const char dummystring[2] = {' ', 0}; 284 const int escape = !(flags & FNM_NOESCAPE); 285 const int slash = !!(flags & FNM_PATHNAME); 286 const int leading_dir = !!(flags & FNM_LEADING_DIR); 287 const char *strendseg; 288 const char *dummyptr; 289 const char *matchptr; 290 int wild; 291 /* For '*' wild processing only; surpress 'used before initialization' 292 * warnings with dummy initialization values; 293 */ 294 const char *strstartseg = NULL; 295 const char *mismatch = NULL; 296 int matchlen = 0; 297 298 if (*pattern == '*') 299 goto firstsegment; 300 301 while (*pattern && *string) 302 { 303 /* Pre-decode "\/" which has no special significance, and 304 * match balanced slashes, starting a new segment pattern 305 */ 306 if (slash && escape && (*pattern == '\\') && (pattern[1] == '/')) 307 ++pattern; 308 if (slash && (*pattern == '/') && (*string == '/')) { 309 ++pattern; 310 ++string; 311 } 312 313firstsegment: 314 /* At the beginning of each segment, validate leading period behavior. 315 */ 316 if ((flags & FNM_PERIOD) && (*string == '.')) 317 { 318 if (*pattern == '.') 319 ++pattern; 320 else if (escape && (*pattern == '\\') && (pattern[1] == '.')) 321 pattern += 2; 322 else 323 return FNM_NOMATCH; 324 ++string; 325 } 326 327 /* Determine the end of string segment 328 * 329 * Presumes '/' character is unique, not composite in any MBCS encoding 330 */ 331 if (slash) { 332 strendseg = strchr(string, '/'); 333 if (!strendseg) 334 strendseg = strchr(string, '\0'); 335 } 336 else { 337 strendseg = strchr(string, '\0'); 338 } 339 340 /* Allow pattern '*' to be consumed even with no remaining string to match 341 */ 342 while (*pattern) 343 { 344 if ((string > strendseg) 345 || ((string == strendseg) && (*pattern != '*'))) 346 break; 347 348 if (slash && ((*pattern == '/') 349 || (escape && (*pattern == '\\') 350 && (pattern[1] == '/')))) 351 break; 352 353 /* Reduce groups of '*' and '?' to n '?' matches 354 * followed by one '*' test for simplicity 355 */ 356 for (wild = 0; ((*pattern == '*') || (*pattern == '?')); ++pattern) 357 { 358 if (*pattern == '*') { 359 wild = 1; 360 } 361 else if (string < strendseg) { /* && (*pattern == '?') */ 362 /* XXX: Advance 1 char for MBCS locale */ 363 ++string; 364 } 365 else { /* (string >= strendseg) && (*pattern == '?') */ 366 return FNM_NOMATCH; 367 } 368 } 369 370 if (wild) 371 { 372 strstartseg = string; 373 mismatch = pattern; 374 375 /* Count fixed (non '*') char matches remaining in pattern 376 * excluding '/' (or "\/") and '*' 377 */ 378 for (matchptr = pattern, matchlen = 0; 1; ++matchlen) 379 { 380 if ((*matchptr == '\0') 381 || (slash && ((*matchptr == '/') 382 || (escape && (*matchptr == '\\') 383 && (matchptr[1] == '/'))))) 384 { 385 /* Compare precisely this many trailing string chars, 386 * the resulting match needs no wildcard loop 387 */ 388 /* XXX: Adjust for MBCS */ 389 if (string + matchlen > strendseg) 390 return FNM_NOMATCH; 391 392 string = strendseg - matchlen; 393 wild = 0; 394 break; 395 } 396 397 if (*matchptr == '*') 398 { 399 /* Ensure at least this many trailing string chars remain 400 * for the first comparison 401 */ 402 /* XXX: Adjust for MBCS */ 403 if (string + matchlen > strendseg) 404 return FNM_NOMATCH; 405 406 /* Begin first wild comparison at the current position */ 407 break; 408 } 409 410 /* Skip forward in pattern by a single character match 411 * Use a dummy fnmatch_ch() test to count one "[range]" escape 412 */ 413 /* XXX: Adjust for MBCS */ 414 if (escape && (*matchptr == '\\') && matchptr[1]) { 415 matchptr += 2; 416 } 417 else if (*matchptr == '[') { 418 dummyptr = dummystring; 419 fnmatch_ch(&matchptr, &dummyptr, flags); 420 } 421 else { 422 ++matchptr; 423 } 424 } 425 } 426 427 /* Incrementally match string against the pattern 428 */ 429 while (*pattern && (string < strendseg)) 430 { 431 /* Success; begin a new wild pattern search 432 */ 433 if (*pattern == '*') 434 break; 435 436 if (slash && ((*string == '/') 437 || (*pattern == '/') 438 || (escape && (*pattern == '\\') 439 && (pattern[1] == '/')))) 440 break; 441 442 /* Compare ch's (the pattern is advanced over "\/" to the '/', 443 * but slashes will mismatch, and are not consumed) 444 */ 445 if (!fnmatch_ch(&pattern, &string, flags)) 446 continue; 447 448 /* Failed to match, loop against next char offset of string segment 449 * until not enough string chars remain to match the fixed pattern 450 */ 451 if (wild) { 452 /* XXX: Advance 1 char for MBCS locale */ 453 string = ++strstartseg; 454 if (string + matchlen > strendseg) 455 return FNM_NOMATCH; 456 457 pattern = mismatch; 458 continue; 459 } 460 else 461 return FNM_NOMATCH; 462 } 463 } 464 465 if (*string && !((slash || leading_dir) && (*string == '/'))) 466 return FNM_NOMATCH; 467 468 if (*pattern && !(slash && ((*pattern == '/') 469 || (escape && (*pattern == '\\') 470 && (pattern[1] == '/'))))) 471 return FNM_NOMATCH; 472 473 if (leading_dir && !*pattern && *string == '/') 474 return 0; 475 } 476 477 /* Where both pattern and string are at EOS, declare success 478 */ 479 if (!*string && !*pattern) 480 return 0; 481 482 /* pattern didn't match to the end of string */ 483 return FNM_NOMATCH; 484} 485