1
2/*--------------------------------------------------------------------*/
3/*--- A simple sequence matching facility.                         ---*/
4/*---                                                 m_seqmatch.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2008-2015 OpenWorks Ltd
12      info@open-works.co.uk
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics.h"
33#include "pub_core_libcassert.h"
34#include "pub_core_libcbase.h"    // VG_(strlen)
35#include "pub_core_seqmatch.h"    // self
36
37/* ---------------------------------------------------------------------
38   A simple sequence matching facility
39   ------------------------------------------------------------------ */
40
41/* See detailed comment in include/pub_tool_seqmatch.h about this. */
42Bool VG_(generic_match) (
43        Bool matchAll,
44        const void* patt,  SizeT szbPatt,  UWord nPatt,  UWord ixPatt,
45        const void* input, SizeT szbInput, UWord nInput, UWord ixInput,
46        Bool (*pIsStar)(const void*),
47        Bool (*pIsQuery)(const void*),
48        Bool (*pattEQinp)(const void*,const void*,void*,UWord),
49        void* inputCompleter, Bool (*haveInputInpC)(void*,UWord)
50     )
51{
52   /* This is the spec, written in my favourite formal specification
53      language.  It specifies non-greedy matching of '*'s.
54
55      ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
56      ma ('*':ps) []     = ma ps []
57
58      ma ('?':ps) (i:is) = ma ps is
59      ma ('?':ps) []     = False
60
61      ma (p:ps)   (i:is) = p == i && ma ps is
62
63      ma (p:ps)   []     = False
64      ma []       (i:is) = False -- m-all, True for m-prefix
65      ma []       []     = True
66   */
67   Bool  havePatt, haveInput;
68   const HChar *currPatt, *currInput;
69  tailcall:
70   vg_assert(nPatt >= 0 && nPatt  < 1000000); /* arbitrary */
71   vg_assert(inputCompleter
72             || (nInput >= 0  && nInput < 1000000)); /* arbitrary */
73   vg_assert(ixPatt >= 0  && ixPatt <= nPatt);
74   vg_assert(ixInput >= 0 && (inputCompleter || ixInput <= nInput));
75
76   havePatt  = ixPatt < nPatt;
77   haveInput = inputCompleter ?
78      (*haveInputInpC)(inputCompleter, ixInput)
79      : ixInput < nInput;
80
81   /* No specific need to set NULL when !have{Patt,Input}, but guards
82      against inadvertantly dereferencing an out of range pointer to
83      the pattern or input arrays. */
84   currPatt  = havePatt  ? ((const HChar*)patt) + szbPatt * ixPatt    : NULL;
85   currInput = haveInput && !inputCompleter ?
86      ((const HChar*)input) + szbInput * ixInput : NULL;
87
88   // Deal with the complex case first: wildcards.  Do frugal
89   // matching.  When encountering a '*', first skip no characters
90   // at all, and see if the rest of the match still works.  Only if
91   // that fails do we then skip a character, and retry at the next
92   // position.
93   //
94   // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
95   //
96   // If we're out of input, check the rest of the pattern matches
97   // the empty input.  This really means can only be be empty or
98   // composed entirely of '*'s.
99   //
100   // ma ('*':ps) []     = ma ps []
101   //
102   if (havePatt && pIsStar(currPatt)) {
103      if (haveInput) {
104         // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
105         // we unavoidably have to make a real recursive call for the
106         // first half of the OR, since this isn't straight tail-recursion.
107         if (VG_(generic_match)( matchAll,
108                                 patt, szbPatt, nPatt,  ixPatt+1,
109                                 input,szbInput,nInput, ixInput+0,
110                                 pIsStar,pIsQuery,pattEQinp,
111                                 inputCompleter,haveInputInpC) ) {
112            return True;
113         }
114         // but we can tail-recurse for the second call
115         ixInput++; goto tailcall;
116      } else {
117         // ma ('*':ps) []     = ma ps []
118         ixPatt++; goto tailcall;
119      }
120   }
121
122   // simpler cases now.  Deal with '?' wildcards.
123   //
124   // ma ('?':ps) (i:is) = ma ps is
125   // ma ('?':ps) []     = False
126   if (havePatt && pIsQuery(currPatt)) {
127      if (haveInput) {
128         ixPatt++; ixInput++; goto tailcall;
129      } else {
130         return False;
131      }
132   }
133
134   // obvious case with literal chars in the pattern
135   //
136   // ma (p:ps)   (i:is) = p == i && ma ps is
137   if (havePatt && haveInput) {
138      if (!pattEQinp(currPatt,currInput,inputCompleter,ixInput)) return False;
139      ixPatt++; ixInput++; goto tailcall;
140   }
141
142   // if we run out of input before we run out of pattern, we must fail
143   // ma (p:ps)   []     = False
144   if (havePatt && !haveInput) return False;
145
146   // if we run out of pattern before we run out of input, the
147   // verdict depends on the matching mode.  If we are trying to
148   // match exactly (the pattern must consume the entire input)
149   // then the outcome is failure.  However, if we're merely attempting
150   // to match some prefix of the input, then we have been successful.
151   //
152   // ma []       (i:is) = False -- m-all, True for m-prefix
153   if (!havePatt && haveInput) {
154      return matchAll ? False // match-all
155                      : True; // match-prefix
156   }
157
158   // finally, if both sequence and input are both completely
159   // consumed, then we were successful, regardless of matching mode.
160   if (!havePatt && !haveInput) return True;
161
162   // end of cases
163   vg_assert(0);
164}
165
166
167/* And a parameterization of the above, to make it do
168   string matching.
169*/
170static Bool charIsStar  ( const void* pV ) { return *(const HChar*)pV == '*'; }
171static Bool charIsQuery ( const void* pV ) { return *(const HChar*)pV == '?'; }
172static Bool char_p_EQ_i ( const void* pV, const void* cV,
173                          void* null_completer, UWord ixcV ) {
174   HChar p = *(const HChar*)pV;
175   HChar c = *(const HChar*)cV;
176   vg_assert(p != '*' && p != '?');
177   return p == c;
178}
179Bool VG_(string_match) ( const HChar* patt, const HChar* input )
180{
181   return VG_(generic_match)(
182             True/* match-all */,
183             patt,  sizeof(HChar), VG_(strlen)(patt), 0,
184             input, sizeof(HChar), VG_(strlen)(input), 0,
185             charIsStar, charIsQuery, char_p_EQ_i,
186             NULL, NULL
187          );
188}
189
190
191// test cases for the matcher (in match-all mode)
192// typedef struct { char* patt; char* input; Bool xres; } Test;
193//
194//static Test tests[] =
195//  {
196//    { ""          ,""   , True },
197//    { "a"         ,""   , False },
198//    { "a"         ,"b"  , False },
199//    { "a"         ,"a"  , True },
200//    { "a"         ,"aa" , False },
201//    { "*"         ,""   , True },
202//    { "**"        ,""   , True },
203//    { "*"         ,"abc", True },
204//    { "*a"        ,"abc", False },
205//    { "*b"        ,"abc", False },
206//    { "*bc"       ,"abc", True },
207//    { "a*b"       ,"abc", False },
208//    { "a*c"       ,"abc", True },
209//    { "*c"        ,"abc", True },
210//    { "c*c"       ,"abc", False },
211//    { "abc*"      ,"abc", True },
212//    { "abc**"     ,"abc", True },
213//    { "**abc"     ,"abc", True },
214//    { "**a*b*c**" ,"abc", True },
215//    { "**a*b*d**" ,"abc", False },
216//    { "a?b"       ,"abc", False },
217//    { "a?c"       ,"abc", True },
218//    { "?"         ,""   , False },
219//    { "?"         ,"a"  , True },
220//    { "?"         ,"ab" , False },
221//    { "abcd"      ,"abc", False },
222//    { "ab"        ,"abc", False },
223//    { NULL        ,NULL , False }
224//  };
225//
226//int main ( void )
227//{
228//   Test* t;
229//   for (t = tests; t->patt; t++) {
230//     printf("%-10s %-6s  %s\n",
231//            t->patt, t->input,
232//            match_string_all((UChar*)t->patt,(UChar*)t->input,True)
233//            == t->xres
234//               ? "pass" : "FAIL" );
235//   }
236//   return 0;
237//}
238
239/*--------------------------------------------------------------------*/
240/*--- end                                             m_seqmatch.c ---*/
241/*--------------------------------------------------------------------*/
242