1 2/*--------------------------------------------------------------------*/ 3/*--- A simple sequence matching facility. ---*/ 4/*--- m_seqmatch.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2008-2015 OpenWorks Ltd 12 info@open-works.co.uk 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30*/ 31 32#include "pub_core_basics.h" 33#include "pub_core_libcassert.h" 34#include "pub_core_libcbase.h" // VG_(strlen) 35#include "pub_core_seqmatch.h" // self 36 37/* --------------------------------------------------------------------- 38 A simple sequence matching facility 39 ------------------------------------------------------------------ */ 40 41/* See detailed comment in include/pub_tool_seqmatch.h about this. */ 42Bool VG_(generic_match) ( 43 Bool matchAll, 44 const void* patt, SizeT szbPatt, UWord nPatt, UWord ixPatt, 45 const void* input, SizeT szbInput, UWord nInput, UWord ixInput, 46 Bool (*pIsStar)(const void*), 47 Bool (*pIsQuery)(const void*), 48 Bool (*pattEQinp)(const void*,const void*,void*,UWord), 49 void* inputCompleter, Bool (*haveInputInpC)(void*,UWord) 50 ) 51{ 52 /* This is the spec, written in my favourite formal specification 53 language. It specifies non-greedy matching of '*'s. 54 55 ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is 56 ma ('*':ps) [] = ma ps [] 57 58 ma ('?':ps) (i:is) = ma ps is 59 ma ('?':ps) [] = False 60 61 ma (p:ps) (i:is) = p == i && ma ps is 62 63 ma (p:ps) [] = False 64 ma [] (i:is) = False -- m-all, True for m-prefix 65 ma [] [] = True 66 */ 67 Bool havePatt, haveInput; 68 const HChar *currPatt, *currInput; 69 tailcall: 70 vg_assert(nPatt >= 0 && nPatt < 1000000); /* arbitrary */ 71 vg_assert(inputCompleter 72 || (nInput >= 0 && nInput < 1000000)); /* arbitrary */ 73 vg_assert(ixPatt >= 0 && ixPatt <= nPatt); 74 vg_assert(ixInput >= 0 && (inputCompleter || ixInput <= nInput)); 75 76 havePatt = ixPatt < nPatt; 77 haveInput = inputCompleter ? 78 (*haveInputInpC)(inputCompleter, ixInput) 79 : ixInput < nInput; 80 81 /* No specific need to set NULL when !have{Patt,Input}, but guards 82 against inadvertantly dereferencing an out of range pointer to 83 the pattern or input arrays. */ 84 currPatt = havePatt ? ((const HChar*)patt) + szbPatt * ixPatt : NULL; 85 currInput = haveInput && !inputCompleter ? 86 ((const HChar*)input) + szbInput * ixInput : NULL; 87 88 // Deal with the complex case first: wildcards. Do frugal 89 // matching. When encountering a '*', first skip no characters 90 // at all, and see if the rest of the match still works. Only if 91 // that fails do we then skip a character, and retry at the next 92 // position. 93 // 94 // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is 95 // 96 // If we're out of input, check the rest of the pattern matches 97 // the empty input. This really means can only be be empty or 98 // composed entirely of '*'s. 99 // 100 // ma ('*':ps) [] = ma ps [] 101 // 102 if (havePatt && pIsStar(currPatt)) { 103 if (haveInput) { 104 // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is 105 // we unavoidably have to make a real recursive call for the 106 // first half of the OR, since this isn't straight tail-recursion. 107 if (VG_(generic_match)( matchAll, 108 patt, szbPatt, nPatt, ixPatt+1, 109 input,szbInput,nInput, ixInput+0, 110 pIsStar,pIsQuery,pattEQinp, 111 inputCompleter,haveInputInpC) ) { 112 return True; 113 } 114 // but we can tail-recurse for the second call 115 ixInput++; goto tailcall; 116 } else { 117 // ma ('*':ps) [] = ma ps [] 118 ixPatt++; goto tailcall; 119 } 120 } 121 122 // simpler cases now. Deal with '?' wildcards. 123 // 124 // ma ('?':ps) (i:is) = ma ps is 125 // ma ('?':ps) [] = False 126 if (havePatt && pIsQuery(currPatt)) { 127 if (haveInput) { 128 ixPatt++; ixInput++; goto tailcall; 129 } else { 130 return False; 131 } 132 } 133 134 // obvious case with literal chars in the pattern 135 // 136 // ma (p:ps) (i:is) = p == i && ma ps is 137 if (havePatt && haveInput) { 138 if (!pattEQinp(currPatt,currInput,inputCompleter,ixInput)) return False; 139 ixPatt++; ixInput++; goto tailcall; 140 } 141 142 // if we run out of input before we run out of pattern, we must fail 143 // ma (p:ps) [] = False 144 if (havePatt && !haveInput) return False; 145 146 // if we run out of pattern before we run out of input, the 147 // verdict depends on the matching mode. If we are trying to 148 // match exactly (the pattern must consume the entire input) 149 // then the outcome is failure. However, if we're merely attempting 150 // to match some prefix of the input, then we have been successful. 151 // 152 // ma [] (i:is) = False -- m-all, True for m-prefix 153 if (!havePatt && haveInput) { 154 return matchAll ? False // match-all 155 : True; // match-prefix 156 } 157 158 // finally, if both sequence and input are both completely 159 // consumed, then we were successful, regardless of matching mode. 160 if (!havePatt && !haveInput) return True; 161 162 // end of cases 163 vg_assert(0); 164} 165 166 167/* And a parameterization of the above, to make it do 168 string matching. 169*/ 170static Bool charIsStar ( const void* pV ) { return *(const HChar*)pV == '*'; } 171static Bool charIsQuery ( const void* pV ) { return *(const HChar*)pV == '?'; } 172static Bool char_p_EQ_i ( const void* pV, const void* cV, 173 void* null_completer, UWord ixcV ) { 174 HChar p = *(const HChar*)pV; 175 HChar c = *(const HChar*)cV; 176 vg_assert(p != '*' && p != '?'); 177 return p == c; 178} 179Bool VG_(string_match) ( const HChar* patt, const HChar* input ) 180{ 181 return VG_(generic_match)( 182 True/* match-all */, 183 patt, sizeof(HChar), VG_(strlen)(patt), 0, 184 input, sizeof(HChar), VG_(strlen)(input), 0, 185 charIsStar, charIsQuery, char_p_EQ_i, 186 NULL, NULL 187 ); 188} 189 190 191// test cases for the matcher (in match-all mode) 192// typedef struct { char* patt; char* input; Bool xres; } Test; 193// 194//static Test tests[] = 195// { 196// { "" ,"" , True }, 197// { "a" ,"" , False }, 198// { "a" ,"b" , False }, 199// { "a" ,"a" , True }, 200// { "a" ,"aa" , False }, 201// { "*" ,"" , True }, 202// { "**" ,"" , True }, 203// { "*" ,"abc", True }, 204// { "*a" ,"abc", False }, 205// { "*b" ,"abc", False }, 206// { "*bc" ,"abc", True }, 207// { "a*b" ,"abc", False }, 208// { "a*c" ,"abc", True }, 209// { "*c" ,"abc", True }, 210// { "c*c" ,"abc", False }, 211// { "abc*" ,"abc", True }, 212// { "abc**" ,"abc", True }, 213// { "**abc" ,"abc", True }, 214// { "**a*b*c**" ,"abc", True }, 215// { "**a*b*d**" ,"abc", False }, 216// { "a?b" ,"abc", False }, 217// { "a?c" ,"abc", True }, 218// { "?" ,"" , False }, 219// { "?" ,"a" , True }, 220// { "?" ,"ab" , False }, 221// { "abcd" ,"abc", False }, 222// { "ab" ,"abc", False }, 223// { NULL ,NULL , False } 224// }; 225// 226//int main ( void ) 227//{ 228// Test* t; 229// for (t = tests; t->patt; t++) { 230// printf("%-10s %-6s %s\n", 231// t->patt, t->input, 232// match_string_all((UChar*)t->patt,(UChar*)t->input,True) 233// == t->xres 234// ? "pass" : "FAIL" ); 235// } 236// return 0; 237//} 238 239/*--------------------------------------------------------------------*/ 240/*--- end m_seqmatch.c ---*/ 241/*--------------------------------------------------------------------*/ 242