1/* stringlib: fastsearch implementation */
2
3#ifndef STRINGLIB_FASTSEARCH_H
4#define STRINGLIB_FASTSEARCH_H
5
6/* fast search/count implementation, based on a mix between boyer-
7   moore and horspool, with a few more bells and whistles on the top.
8   for some more background, see: http://effbot.org/zone/stringlib.htm */
9
10/* note: fastsearch may access s[n], which isn't a problem when using
11   Python's ordinary string types, but may cause problems if you're
12   using this code in other contexts.  also, the count mode returns -1
13   if there cannot possible be a match in the target string, and 0 if
14   it has actually checked for matches, but didn't find any.  callers
15   beware! */
16
17#define FAST_COUNT 0
18#define FAST_SEARCH 1
19#define FAST_RSEARCH 2
20
21#if LONG_BIT >= 128
22#define STRINGLIB_BLOOM_WIDTH 128
23#elif LONG_BIT >= 64
24#define STRINGLIB_BLOOM_WIDTH 64
25#elif LONG_BIT >= 32
26#define STRINGLIB_BLOOM_WIDTH 32
27#else
28#error "LONG_BIT is smaller than 32"
29#endif
30
31#define STRINGLIB_BLOOM_ADD(mask, ch) \
32    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
33#define STRINGLIB_BLOOM(mask, ch)     \
34    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
35
36Py_LOCAL_INLINE(Py_ssize_t)
37fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
38           const STRINGLIB_CHAR* p, Py_ssize_t m,
39           Py_ssize_t maxcount, int mode)
40{
41    unsigned long mask;
42    Py_ssize_t skip, count = 0;
43    Py_ssize_t i, j, mlast, w;
44
45    w = n - m;
46
47    if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
48        return -1;
49
50    /* look for special cases */
51    if (m <= 1) {
52        if (m <= 0)
53            return -1;
54        /* use special case for 1-character strings */
55        if (mode == FAST_COUNT) {
56            for (i = 0; i < n; i++)
57                if (s[i] == p[0]) {
58                    count++;
59                    if (count == maxcount)
60                        return maxcount;
61                }
62            return count;
63        } else if (mode == FAST_SEARCH) {
64            for (i = 0; i < n; i++)
65                if (s[i] == p[0])
66                    return i;
67        } else {    /* FAST_RSEARCH */
68            for (i = n - 1; i > -1; i--)
69                if (s[i] == p[0])
70                    return i;
71        }
72        return -1;
73    }
74
75    mlast = m - 1;
76    skip = mlast - 1;
77    mask = 0;
78
79    if (mode != FAST_RSEARCH) {
80
81        /* create compressed boyer-moore delta 1 table */
82
83        /* process pattern[:-1] */
84        for (i = 0; i < mlast; i++) {
85            STRINGLIB_BLOOM_ADD(mask, p[i]);
86            if (p[i] == p[mlast])
87                skip = mlast - i - 1;
88        }
89        /* process pattern[-1] outside the loop */
90        STRINGLIB_BLOOM_ADD(mask, p[mlast]);
91
92        for (i = 0; i <= w; i++) {
93            /* note: using mlast in the skip path slows things down on x86 */
94            if (s[i+m-1] == p[m-1]) {
95                /* candidate match */
96                for (j = 0; j < mlast; j++)
97                    if (s[i+j] != p[j])
98                        break;
99                if (j == mlast) {
100                    /* got a match! */
101                    if (mode != FAST_COUNT)
102                        return i;
103                    count++;
104                    if (count == maxcount)
105                        return maxcount;
106                    i = i + mlast;
107                    continue;
108                }
109                /* miss: check if next character is part of pattern */
110                if (!STRINGLIB_BLOOM(mask, s[i+m]))
111                    i = i + m;
112                else
113                    i = i + skip;
114            } else {
115                /* skip: check if next character is part of pattern */
116                if (!STRINGLIB_BLOOM(mask, s[i+m]))
117                    i = i + m;
118            }
119        }
120    } else {    /* FAST_RSEARCH */
121
122        /* create compressed boyer-moore delta 1 table */
123
124        /* process pattern[0] outside the loop */
125        STRINGLIB_BLOOM_ADD(mask, p[0]);
126        /* process pattern[:0:-1] */
127        for (i = mlast; i > 0; i--) {
128            STRINGLIB_BLOOM_ADD(mask, p[i]);
129            if (p[i] == p[0])
130                skip = i - 1;
131        }
132
133        for (i = w; i >= 0; i--) {
134            if (s[i] == p[0]) {
135                /* candidate match */
136                for (j = mlast; j > 0; j--)
137                    if (s[i+j] != p[j])
138                        break;
139                if (j == 0)
140                    /* got a match! */
141                    return i;
142                /* miss: check if previous character is part of pattern */
143                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
144                    i = i - m;
145                else
146                    i = i - skip;
147            } else {
148                /* skip: check if previous character is part of pattern */
149                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
150                    i = i - m;
151            }
152        }
153    }
154
155    if (mode != FAST_COUNT)
156        return -1;
157    return count;
158}
159
160#endif
161