regexp_benchmark.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Benchmarks for regular expression implementations.
6
7#include "util/test.h"
8#include "re2/prog.h"
9#include "re2/re2.h"
10#include "re2/regexp.h"
11#include "util/pcre.h"
12#include "util/benchmark.h"
13
14namespace re2 {
15void Test();
16void MemoryUsage();
17}  // namespace re2
18
19typedef testing::MallocCounter MallocCounter;
20
21namespace re2 {
22
23void Test() {
24  Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
25  CHECK(re);
26  Prog* prog = re->CompileToProg(0);
27  CHECK(prog);
28  CHECK(prog->IsOnePass());
29  const char* text = "650-253-0001";
30  StringPiece sp[4];
31  CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
32  CHECK_EQ(sp[0], "650-253-0001");
33  CHECK_EQ(sp[1], "650");
34  CHECK_EQ(sp[2], "253");
35  CHECK_EQ(sp[3], "0001");
36  delete prog;
37  re->Decref();
38  LOG(INFO) << "test passed\n";
39}
40
41void MemoryUsage() {
42  const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
43  const char* text = "650-253-0001";
44  {
45    MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
46    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
47    CHECK(re);
48    // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
49    // because LOG(INFO) might do a big allocation before they get evaluated.
50    fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
51    mc.Reset();
52
53    Prog* prog = re->CompileToProg(0);
54    CHECK(prog);
55    CHECK(prog->IsOnePass());
56    fprintf(stderr, "Prog:   %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
57    mc.Reset();
58
59    StringPiece sp[4];
60    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
61    fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
62    delete prog;
63    re->Decref();
64  }
65
66  {
67    MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
68
69    PCRE re(regexp, PCRE::UTF8);
70    fprintf(stderr, "RE:     %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
71    PCRE::FullMatch(text, re);
72    fprintf(stderr, "RE:     %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
73  }
74
75  {
76    MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
77
78    PCRE* re = new PCRE(regexp, PCRE::UTF8);
79    fprintf(stderr, "PCRE*:  %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
80    PCRE::FullMatch(text, *re);
81    fprintf(stderr, "PCRE*:  %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
82    delete re;
83  }
84
85  {
86    MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
87
88    RE2 re(regexp);
89    fprintf(stderr, "RE2:    %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
90    RE2::FullMatch(text, re);
91    fprintf(stderr, "RE2:    %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
92  }
93
94  fprintf(stderr, "sizeof: PCRE=%d RE2=%d Prog=%d Inst=%d\n",
95          static_cast<int>(sizeof(PCRE)),
96          static_cast<int>(sizeof(RE2)),
97          static_cast<int>(sizeof(Prog)),
98          static_cast<int>(sizeof(Prog::Inst)));
99}
100
101// Regular expression implementation wrappers.
102// Defined at bottom of file, but they are repetitive
103// and not interesting.
104
105typedef void SearchImpl(int iters, const char* regexp, const StringPiece& text,
106             Prog::Anchor anchor, bool expect_match);
107
108SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState,
109           SearchPCRE, SearchRE2,
110           SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass, SearchCachedBitState,
111           SearchCachedPCRE, SearchCachedRE2;
112
113typedef void ParseImpl(int iters, const char* regexp, const StringPiece& text);
114
115ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState,
116          Parse1PCRE, Parse1RE2,
117          Parse1Backtrack,
118          Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
119          Parse1CachedPCRE, Parse1CachedRE2,
120          Parse1CachedBacktrack;
121
122ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState,
123          Parse3PCRE, Parse3RE2,
124          Parse3Backtrack,
125          Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
126          Parse3CachedPCRE, Parse3CachedRE2,
127          Parse3CachedBacktrack;
128
129ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
130
131ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
132
133// Benchmark: failed search for regexp in random text.
134
135// Generate random text that won't contain the search string,
136// to test worst-case search behavior.
137void MakeText(string* text, int nbytes) {
138  text->resize(nbytes);
139  srand(0);
140  for (int i = 0; i < nbytes; i++) {
141    if (!rand()%30)
142      (*text)[i] = '\n';
143    else
144      (*text)[i] = rand()%(0x7E + 1 - 0x20)+0x20;
145  }
146}
147
148// Makes text of size nbytes, then calls run to search
149// the text for regexp iters times.
150void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) {
151  StopBenchmarkTiming();
152  string s;
153  MakeText(&s, nbytes);
154  BenchmarkMemoryUsage();
155  StartBenchmarkTiming();
156  search(iters, regexp, s, Prog::kUnanchored, false);
157  SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
158}
159
160// These two are easy because they start with an A,
161// giving the search loop something to memchr for.
162#define EASY0      "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
163#define EASY1      "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
164
165// This is a little harder, since it starts with a character class
166// and thus can't be memchr'ed.  Could look for ABC and work backward,
167// but no one does that.
168#define MEDIUM     "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
169
170// This is a fair amount harder, because of the leading [ -~]*.
171// A bad backtracking implementation will take O(text^2) time to
172// figure out there's no match.
173#define HARD       "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
174
175// This stresses engines that are trying to track parentheses.
176#define PARENS     "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
177                   "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
178
179void Search_Easy0_CachedDFA(int i, int n)     { Search(i, n, EASY0, SearchCachedDFA); }
180void Search_Easy0_CachedNFA(int i, int n)     { Search(i, n, EASY0, SearchCachedNFA); }
181void Search_Easy0_CachedPCRE(int i, int n)    { Search(i, n, EASY0, SearchCachedPCRE); }
182void Search_Easy0_CachedRE2(int i, int n)     { Search(i, n, EASY0, SearchCachedRE2); }
183
184BENCHMARK_RANGE(Search_Easy0_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
185BENCHMARK_RANGE(Search_Easy0_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
186#ifdef USEPCRE
187BENCHMARK_RANGE(Search_Easy0_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
188#endif
189BENCHMARK_RANGE(Search_Easy0_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
190
191void Search_Easy1_CachedDFA(int i, int n)     { Search(i, n, EASY1, SearchCachedDFA); }
192void Search_Easy1_CachedNFA(int i, int n)     { Search(i, n, EASY1, SearchCachedNFA); }
193void Search_Easy1_CachedPCRE(int i, int n)    { Search(i, n, EASY1, SearchCachedPCRE); }
194void Search_Easy1_CachedRE2(int i, int n)     { Search(i, n, EASY1, SearchCachedRE2); }
195
196BENCHMARK_RANGE(Search_Easy1_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
197BENCHMARK_RANGE(Search_Easy1_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
198#ifdef USEPCRE
199BENCHMARK_RANGE(Search_Easy1_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
200#endif
201BENCHMARK_RANGE(Search_Easy1_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
202
203void Search_Medium_CachedDFA(int i, int n)     { Search(i, n, MEDIUM, SearchCachedDFA); }
204void Search_Medium_CachedNFA(int i, int n)     { Search(i, n, MEDIUM, SearchCachedNFA); }
205void Search_Medium_CachedPCRE(int i, int n)    { Search(i, n, MEDIUM, SearchCachedPCRE); }
206void Search_Medium_CachedRE2(int i, int n)     { Search(i, n, MEDIUM, SearchCachedRE2); }
207
208BENCHMARK_RANGE(Search_Medium_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
209BENCHMARK_RANGE(Search_Medium_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
210#ifdef USEPCRE
211BENCHMARK_RANGE(Search_Medium_CachedPCRE,    8, 256<<10)->ThreadRange(1, NumCPUs());
212#endif
213BENCHMARK_RANGE(Search_Medium_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
214
215void Search_Hard_CachedDFA(int i, int n)     { Search(i, n, HARD, SearchCachedDFA); }
216void Search_Hard_CachedNFA(int i, int n)     { Search(i, n, HARD, SearchCachedNFA); }
217void Search_Hard_CachedPCRE(int i, int n)    { Search(i, n, HARD, SearchCachedPCRE); }
218void Search_Hard_CachedRE2(int i, int n)     { Search(i, n, HARD, SearchCachedRE2); }
219
220BENCHMARK_RANGE(Search_Hard_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
221BENCHMARK_RANGE(Search_Hard_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
222#ifdef USEPCRE
223BENCHMARK_RANGE(Search_Hard_CachedPCRE,    8, 4<<10)->ThreadRange(1, NumCPUs());
224#endif
225BENCHMARK_RANGE(Search_Hard_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
226
227void Search_Parens_CachedDFA(int i, int n)     { Search(i, n, PARENS, SearchCachedDFA); }
228void Search_Parens_CachedNFA(int i, int n)     { Search(i, n, PARENS, SearchCachedNFA); }
229void Search_Parens_CachedPCRE(int i, int n)    { Search(i, n, PARENS, SearchCachedPCRE); }
230void Search_Parens_CachedRE2(int i, int n)     { Search(i, n, PARENS, SearchCachedRE2); }
231
232BENCHMARK_RANGE(Search_Parens_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
233BENCHMARK_RANGE(Search_Parens_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
234#ifdef USEPCRE
235BENCHMARK_RANGE(Search_Parens_CachedPCRE,    8, 8)->ThreadRange(1, NumCPUs());
236#endif
237BENCHMARK_RANGE(Search_Parens_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
238
239void SearchBigFixed(int iters, int nbytes, SearchImpl* search) {
240  StopBenchmarkTiming();
241  string s;
242  s.append(nbytes/2, 'x');
243  string regexp = "^" + s + ".*$";
244  string t;
245  MakeText(&t, nbytes/2);
246  s += t;
247  BenchmarkMemoryUsage();
248  StartBenchmarkTiming();
249  search(iters, regexp.c_str(), s, Prog::kUnanchored, true);
250  SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
251}
252
253void Search_BigFixed_CachedDFA(int i, int n)     { SearchBigFixed(i, n, SearchCachedDFA); }
254void Search_BigFixed_CachedNFA(int i, int n)     { SearchBigFixed(i, n, SearchCachedNFA); }
255void Search_BigFixed_CachedPCRE(int i, int n)    { SearchBigFixed(i, n, SearchCachedPCRE); }
256void Search_BigFixed_CachedRE2(int i, int n)     { SearchBigFixed(i, n, SearchCachedRE2); }
257
258BENCHMARK_RANGE(Search_BigFixed_CachedDFA,     8, 1<<20)->ThreadRange(1, NumCPUs());
259BENCHMARK_RANGE(Search_BigFixed_CachedNFA,     8, 32<<10)->ThreadRange(1, NumCPUs());
260#ifdef USEPCRE
261BENCHMARK_RANGE(Search_BigFixed_CachedPCRE,    8, 32<<10)->ThreadRange(1, NumCPUs());
262#endif
263BENCHMARK_RANGE(Search_BigFixed_CachedRE2,     8, 1<<20)->ThreadRange(1, NumCPUs());
264
265// Benchmark: FindAndConsume
266void FindAndConsume(int iters, int nbytes) {
267  StopBenchmarkTiming();
268  string s;
269  MakeText(&s, nbytes);
270  s.append("Hello World");
271  StartBenchmarkTiming();
272  RE2 re("((Hello World))");
273  for (int i = 0; i < iters; i++) {
274    StringPiece t = s;
275    StringPiece u;
276    CHECK(RE2::FindAndConsume(&t, re, &u));
277    CHECK_EQ(u, "Hello World");
278  }
279  SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
280}
281
282BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
283
284// Benchmark: successful anchored search.
285
286void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search) {
287  string s;
288  MakeText(&s, nbytes);
289  BenchmarkMemoryUsage();
290  search(iters, regexp, s, Prog::kAnchored, true);
291  SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
292}
293
294// Unambiguous search (RE2 can use OnePass).
295
296void Search_Success_DFA(int i, int n)     { SearchSuccess(i, n, ".*$", SearchDFA); }
297void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); }
298void Search_Success_PCRE(int i, int n)    { SearchSuccess(i, n, ".*$", SearchPCRE); }
299void Search_Success_RE2(int i, int n)     { SearchSuccess(i, n, ".*$", SearchRE2); }
300
301BENCHMARK_RANGE(Search_Success_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
302#ifdef USEPCRE
303BENCHMARK_RANGE(Search_Success_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
304#endif
305BENCHMARK_RANGE(Search_Success_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
306BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
307
308void Search_Success_CachedDFA(int i, int n)     { SearchSuccess(i, n, ".*$", SearchCachedDFA); }
309void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); }
310void Search_Success_CachedPCRE(int i, int n)    { SearchSuccess(i, n, ".*$", SearchCachedPCRE); }
311void Search_Success_CachedRE2(int i, int n)     { SearchSuccess(i, n, ".*$", SearchCachedRE2); }
312
313BENCHMARK_RANGE(Search_Success_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
314#ifdef USEPCRE
315BENCHMARK_RANGE(Search_Success_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
316#endif
317BENCHMARK_RANGE(Search_Success_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
318BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
319
320// Ambiguous search (RE2 cannot use OnePass).
321
322void Search_Success1_DFA(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchDFA); }
323void Search_Success1_PCRE(int i, int n)    { SearchSuccess(i, n, ".*.$", SearchPCRE); }
324void Search_Success1_RE2(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchRE2); }
325void Search_Success1_BitState(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchBitState); }
326
327BENCHMARK_RANGE(Search_Success1_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
328#ifdef USEPCRE
329BENCHMARK_RANGE(Search_Success1_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
330#endif
331BENCHMARK_RANGE(Search_Success1_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
332BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
333
334void Search_Success1_Cached_DFA(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchCachedDFA); }
335void Search_Success1_Cached_PCRE(int i, int n)    { SearchSuccess(i, n, ".*.$", SearchCachedPCRE); }
336void Search_Success1_Cached_RE2(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchCachedRE2); }
337
338BENCHMARK_RANGE(Search_Success1_Cached_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
339#ifdef USEPCRE
340BENCHMARK_RANGE(Search_Success1_Cached_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
341#endif
342BENCHMARK_RANGE(Search_Success1_Cached_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
343
344// Benchmark: use regexp to find phone number.
345
346void SearchDigits(int iters, SearchImpl* search) {
347  const char *text = "650-253-0001";
348  int len = strlen(text);
349  BenchmarkMemoryUsage();
350  search(iters, "([0-9]+)-([0-9]+)-([0-9]+)",
351         StringPiece(text, len), Prog::kAnchored, true);
352  SetBenchmarkItemsProcessed(iters);
353}
354
355void Search_Digits_DFA(int i)         { SearchDigits(i, SearchDFA); }
356void Search_Digits_NFA(int i)         { SearchDigits(i, SearchNFA); }
357void Search_Digits_OnePass(int i)     { SearchDigits(i, SearchOnePass); }
358void Search_Digits_PCRE(int i)        { SearchDigits(i, SearchPCRE); }
359void Search_Digits_RE2(int i)         { SearchDigits(i, SearchRE2); }
360void Search_Digits_BitState(int i)         { SearchDigits(i, SearchBitState); }
361
362BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
363BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
364BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
365#ifdef USEPCRE
366BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
367#endif
368BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
369BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
370
371// Benchmark: use regexp to parse digit fields in phone number.
372
373void Parse3Digits(int iters,
374               void (*parse3)(int, const char*, const StringPiece&)) {
375  BenchmarkMemoryUsage();
376  parse3(iters, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
377  SetBenchmarkItemsProcessed(iters);
378}
379
380void Parse_Digits_NFA(int i)         { Parse3Digits(i, Parse3NFA); }
381void Parse_Digits_OnePass(int i)     { Parse3Digits(i, Parse3OnePass); }
382void Parse_Digits_PCRE(int i)        { Parse3Digits(i, Parse3PCRE); }
383void Parse_Digits_RE2(int i)         { Parse3Digits(i, Parse3RE2); }
384void Parse_Digits_Backtrack(int i)   { Parse3Digits(i, Parse3Backtrack); }
385void Parse_Digits_BitState(int i)   { Parse3Digits(i, Parse3BitState); }
386
387BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
388BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
389#ifdef USEPCRE
390BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
391#endif
392BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
393BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
394BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
395
396void Parse_CachedDigits_NFA(int i)         { Parse3Digits(i, Parse3CachedNFA); }
397void Parse_CachedDigits_OnePass(int i)     { Parse3Digits(i, Parse3CachedOnePass); }
398void Parse_CachedDigits_PCRE(int i)        { Parse3Digits(i, Parse3CachedPCRE); }
399void Parse_CachedDigits_RE2(int i)         { Parse3Digits(i, Parse3CachedRE2); }
400void Parse_CachedDigits_Backtrack(int i)   { Parse3Digits(i, Parse3CachedBacktrack); }
401void Parse_CachedDigits_BitState(int i)   { Parse3Digits(i, Parse3CachedBitState); }
402
403BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
404BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
405#ifdef USEPCRE
406BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
407#endif
408BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
409BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
410BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
411
412void Parse3DigitDs(int iters,
413               void (*parse3)(int, const char*, const StringPiece&)) {
414  BenchmarkMemoryUsage();
415  parse3(iters, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
416  SetBenchmarkItemsProcessed(iters);
417}
418
419void Parse_DigitDs_NFA(int i)         { Parse3DigitDs(i, Parse3NFA); }
420void Parse_DigitDs_OnePass(int i)     { Parse3DigitDs(i, Parse3OnePass); }
421void Parse_DigitDs_PCRE(int i)        { Parse3DigitDs(i, Parse3PCRE); }
422void Parse_DigitDs_RE2(int i)         { Parse3DigitDs(i, Parse3RE2); }
423void Parse_DigitDs_Backtrack(int i)   { Parse3DigitDs(i, Parse3CachedBacktrack); }
424void Parse_DigitDs_BitState(int i)   { Parse3DigitDs(i, Parse3CachedBitState); }
425
426BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
427BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
428#ifdef USEPCRE
429BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
430#endif
431BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
432BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
433BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
434
435void Parse_CachedDigitDs_NFA(int i)         { Parse3DigitDs(i, Parse3CachedNFA); }
436void Parse_CachedDigitDs_OnePass(int i)     { Parse3DigitDs(i, Parse3CachedOnePass); }
437void Parse_CachedDigitDs_PCRE(int i)        { Parse3DigitDs(i, Parse3CachedPCRE); }
438void Parse_CachedDigitDs_RE2(int i)         { Parse3DigitDs(i, Parse3CachedRE2); }
439void Parse_CachedDigitDs_Backtrack(int i)   { Parse3DigitDs(i, Parse3CachedBacktrack); }
440void Parse_CachedDigitDs_BitState(int i)   { Parse3DigitDs(i, Parse3CachedBitState); }
441
442BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
443BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
444#ifdef USEPCRE
445BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
446#endif
447BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
448BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
449BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
450
451// Benchmark: splitting off leading number field.
452
453void Parse1Split(int iters,
454              void (*parse1)(int, const char*, const StringPiece&)) {
455  BenchmarkMemoryUsage();
456  parse1(iters, "[0-9]+-(.*)", "650-253-0001");
457  SetBenchmarkItemsProcessed(iters);
458}
459
460void Parse_Split_NFA(int i)         { Parse1Split(i, Parse1NFA); }
461void Parse_Split_OnePass(int i)     { Parse1Split(i, Parse1OnePass); }
462void Parse_Split_PCRE(int i)        { Parse1Split(i, Parse1PCRE); }
463void Parse_Split_RE2(int i)         { Parse1Split(i, Parse1RE2); }
464void Parse_Split_BitState(int i)         { Parse1Split(i, Parse1BitState); }
465
466BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
467BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
468#ifdef USEPCRE
469BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
470#endif
471BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
472BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
473
474void Parse_CachedSplit_NFA(int i)         { Parse1Split(i, Parse1CachedNFA); }
475void Parse_CachedSplit_OnePass(int i)     { Parse1Split(i, Parse1CachedOnePass); }
476void Parse_CachedSplit_PCRE(int i)        { Parse1Split(i, Parse1CachedPCRE); }
477void Parse_CachedSplit_RE2(int i)         { Parse1Split(i, Parse1CachedRE2); }
478void Parse_CachedSplit_BitState(int i)         { Parse1Split(i, Parse1CachedBitState); }
479
480BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
481BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
482#ifdef USEPCRE
483BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
484#endif
485BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
486BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
487
488// Benchmark: splitting off leading number field but harder (ambiguous regexp).
489
490void Parse1SplitHard(int iters,
491                  void (*run)(int, const char*, const StringPiece&)) {
492  BenchmarkMemoryUsage();
493  run(iters, "[0-9]+.(.*)", "650-253-0001");
494  SetBenchmarkItemsProcessed(iters);
495}
496
497void Parse_SplitHard_NFA(int i)         { Parse1SplitHard(i, Parse1NFA); }
498void Parse_SplitHard_PCRE(int i)        { Parse1SplitHard(i, Parse1PCRE); }
499void Parse_SplitHard_RE2(int i)         { Parse1SplitHard(i, Parse1RE2); }
500void Parse_SplitHard_BitState(int i)         { Parse1SplitHard(i, Parse1BitState); }
501
502#ifdef USEPCRE
503BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
504#endif
505BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
506BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
507BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
508
509void Parse_CachedSplitHard_NFA(int i)       { Parse1SplitHard(i, Parse1CachedNFA); }
510void Parse_CachedSplitHard_PCRE(int i)      { Parse1SplitHard(i, Parse1CachedPCRE); }
511void Parse_CachedSplitHard_RE2(int i)       { Parse1SplitHard(i, Parse1CachedRE2); }
512void Parse_CachedSplitHard_BitState(int i)       { Parse1SplitHard(i, Parse1CachedBitState); }
513void Parse_CachedSplitHard_Backtrack(int i)       { Parse1SplitHard(i, Parse1CachedBacktrack); }
514
515#ifdef USEPCRE
516BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
517#endif
518BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
519BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
520BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
521BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
522
523// Benchmark: Parse1SplitHard, big text, small match.
524
525void Parse1SplitBig1(int iters,
526                  void (*run)(int, const char*, const StringPiece&)) {
527  string s;
528  s.append(100000, 'x');
529  s.append("650-253-0001");
530  BenchmarkMemoryUsage();
531  run(iters, "[0-9]+.(.*)", s);
532  SetBenchmarkItemsProcessed(iters);
533}
534
535void Parse_CachedSplitBig1_PCRE(int i)      { Parse1SplitBig1(i, SearchParse1CachedPCRE); }
536void Parse_CachedSplitBig1_RE2(int i)       { Parse1SplitBig1(i, SearchParse1CachedRE2); }
537
538#ifdef USEPCRE
539BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
540#endif
541BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
542
543// Benchmark: Parse1SplitHard, big text, big match.
544
545void Parse1SplitBig2(int iters,
546                  void (*run)(int, const char*, const StringPiece&)) {
547  string s;
548  s.append("650-253-");
549  s.append(100000, '0');
550  BenchmarkMemoryUsage();
551  run(iters, "[0-9]+.(.*)", s);
552  SetBenchmarkItemsProcessed(iters);
553}
554
555void Parse_CachedSplitBig2_PCRE(int i)      { Parse1SplitBig2(i, SearchParse1CachedPCRE); }
556void Parse_CachedSplitBig2_RE2(int i)       { Parse1SplitBig2(i, SearchParse1CachedRE2); }
557
558#ifdef USEPCRE
559BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
560#endif
561BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
562
563// Benchmark: measure time required to parse (but not execute)
564// a simple regular expression.
565
566void ParseRegexp(int iters, const string& regexp) {
567  for (int i = 0; i < iters; i++) {
568    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
569    CHECK(re);
570    re->Decref();
571  }
572}
573
574void SimplifyRegexp(int iters, const string& regexp) {
575  for (int i = 0; i < iters; i++) {
576    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
577    CHECK(re);
578    Regexp* sre = re->Simplify();
579    CHECK(sre);
580    sre->Decref();
581    re->Decref();
582  }
583}
584
585void NullWalkRegexp(int iters, const string& regexp) {
586  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
587  CHECK(re);
588  for (int i = 0; i < iters; i++) {
589    re->NullWalk();
590  }
591  re->Decref();
592}
593
594void SimplifyCompileRegexp(int iters, const string& regexp) {
595  for (int i = 0; i < iters; i++) {
596    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
597    CHECK(re);
598    Regexp* sre = re->Simplify();
599    CHECK(sre);
600    Prog* prog = sre->CompileToProg(0);
601    CHECK(prog);
602    delete prog;
603    sre->Decref();
604    re->Decref();
605  }
606}
607
608void CompileRegexp(int iters, const string& regexp) {
609  for (int i = 0; i < iters; i++) {
610    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
611    CHECK(re);
612    Prog* prog = re->CompileToProg(0);
613    CHECK(prog);
614    delete prog;
615    re->Decref();
616  }
617}
618
619void CompileToProg(int iters, const string& regexp) {
620  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
621  CHECK(re);
622  for (int i = 0; i < iters; i++) {
623    Prog* prog = re->CompileToProg(0);
624    CHECK(prog);
625    delete prog;
626  }
627  re->Decref();
628}
629
630void CompileByteMap(int iters, const string& regexp) {
631  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
632  CHECK(re);
633  Prog* prog = re->CompileToProg(0);
634  CHECK(prog);
635  for (int i = 0; i < iters; i++) {
636    prog->ComputeByteMap();
637  }
638  delete prog;
639  re->Decref();
640}
641
642void CompilePCRE(int iters, const string& regexp) {
643  for (int i = 0; i < iters; i++) {
644    PCRE re(regexp, PCRE::UTF8);
645    CHECK_EQ(re.error(), "");
646  }
647}
648
649void CompileRE2(int iters, const string& regexp) {
650  for (int i = 0; i < iters; i++) {
651    RE2 re(regexp);
652    CHECK_EQ(re.error(), "");
653  }
654}
655
656void RunBuild(int iters, const string& regexp, void (*run)(int, const string&)) {
657  run(iters, regexp);
658  SetBenchmarkItemsProcessed(iters);
659}
660
661}  // namespace re2
662
663DEFINE_string(compile_regexp, "(.*)-(\\d+)-of-(\\d+)", "regexp for compile benchmarks");
664
665namespace re2 {
666
667void BM_PCRE_Compile(int i)      { RunBuild(i, FLAGS_compile_regexp, CompilePCRE); }
668void BM_Regexp_Parse(int i)      { RunBuild(i, FLAGS_compile_regexp, ParseRegexp); }
669void BM_Regexp_Simplify(int i)   { RunBuild(i, FLAGS_compile_regexp, SimplifyRegexp); }
670void BM_CompileToProg(int i)     { RunBuild(i, FLAGS_compile_regexp, CompileToProg); }
671void BM_CompileByteMap(int i)     { RunBuild(i, FLAGS_compile_regexp, CompileByteMap); }
672void BM_Regexp_Compile(int i)    { RunBuild(i, FLAGS_compile_regexp, CompileRegexp); }
673void BM_Regexp_SimplifyCompile(int i)   { RunBuild(i, FLAGS_compile_regexp, SimplifyCompileRegexp); }
674void BM_Regexp_NullWalk(int i)   { RunBuild(i, FLAGS_compile_regexp, NullWalkRegexp); }
675void BM_RE2_Compile(int i)       { RunBuild(i, FLAGS_compile_regexp, CompileRE2); }
676
677#ifdef USEPCRE
678BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
679#endif
680BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
681BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
682BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
683BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
684BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
685BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
686BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
687BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
688
689
690// Makes text of size nbytes, then calls run to search
691// the text for regexp iters times.
692void SearchPhone(int iters, int nbytes, ParseImpl* search) {
693  StopBenchmarkTiming();
694  string s;
695  MakeText(&s, nbytes);
696  s.append("(650) 253-0001");
697  BenchmarkMemoryUsage();
698  StartBenchmarkTiming();
699  search(iters, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
700  SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
701}
702
703void SearchPhone_CachedPCRE(int i, int n) {
704  SearchPhone(i, n, SearchParse2CachedPCRE);
705}
706void SearchPhone_CachedRE2(int i, int n) {
707  SearchPhone(i, n, SearchParse2CachedRE2);
708}
709
710#ifdef USEPCRE
711BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
712#endif
713BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
714
715/*
716TODO(rsc): Make this work again.
717
718// Generates and returns a string over binary alphabet {0,1} that contains
719// all possible binary sequences of length n as subsequences.  The obvious
720// brute force method would generate a string of length n * 2^n, but this
721// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
722// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
723static string DeBruijnString(int n) {
724  CHECK_LT(n, 8*sizeof(int));
725  CHECK_GT(n, 0);
726
727  vector<bool> did(1<<n);
728  for (int i = 0; i < 1<<n; i++)
729    did[i] = false;
730
731  string s;
732  for (int i = 0; i < n-1; i++)
733    s.append("0");
734  int bits = 0;
735  int mask = (1<<n) - 1;
736  for (int i = 0; i < (1<<n); i++) {
737    bits <<= 1;
738    bits &= mask;
739    if (!did[bits|1]) {
740      bits |= 1;
741      s.append("1");
742    } else {
743      s.append("0");
744    }
745    CHECK(!did[bits]);
746    did[bits] = true;
747  }
748  return s;
749}
750
751void CacheFill(int iters, int n, SearchImpl *srch) {
752  string s = DeBruijnString(n+1);
753  string t;
754  for (int i = n+1; i < 20; i++) {
755    t = s + s;
756    swap(s, t);
757  }
758  srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
759       Prog::kUnanchored, true);
760  SetBenchmarkBytesProcessed(static_cast<int64>(iters)*s.size());
761}
762
763void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
764void CacheFillRE2(int i, int n)  { CacheFill(i, n, SearchCachedRE2); }
765void CacheFillNFA(int i, int n)  { CacheFill(i, n, SearchCachedNFA); }
766void CacheFillDFA(int i, int n)  { CacheFill(i, n, SearchCachedDFA); }
767
768// BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
769// for the static BenchmarkRegisterer, which makes it unusable inside
770// a macro like DO24 below.  MY_BENCHMARK_WITH_ARG uses the argument a
771// to make the identifiers distinct (only possible when 'a' is a simple
772// expression like 2, not like 1+1).
773#define MY_BENCHMARK_WITH_ARG(n, a) \
774  bool __benchmark_ ## n ## a =     \
775    (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
776
777#define DO24(A, B) \
778  A(B, 1);    A(B, 2);    A(B, 3);    A(B, 4);    A(B, 5);    A(B, 6);  \
779  A(B, 7);    A(B, 8);    A(B, 9);    A(B, 10);   A(B, 11);   A(B, 12); \
780  A(B, 13);   A(B, 14);   A(B, 15);   A(B, 16);   A(B, 17);   A(B, 18); \
781  A(B, 19);   A(B, 20);   A(B, 21);   A(B, 22);   A(B, 23);   A(B, 24);
782
783DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
784DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
785DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
786DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
787
788#undef DO24
789#undef MY_BENCHMARK_WITH_ARG
790*/
791
792////////////////////////////////////////////////////////////////////////
793//
794// Implementation routines.  Sad that there are so many,
795// but all the interfaces are slightly different.
796
797// Runs implementation to search for regexp in text, iters times.
798// Expect_match says whether the regexp should be found.
799// Anchored says whether to run an anchored search.
800
801void SearchDFA(int iters, const char* regexp, const StringPiece& text,
802            Prog::Anchor anchor, bool expect_match) {
803  for (int i = 0; i < iters; i++) {
804    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
805    CHECK(re);
806    Prog* prog = re->CompileToProg(0);
807    CHECK(prog);
808    bool failed = false;
809    CHECK_EQ(prog->SearchDFA(text, NULL, anchor, Prog::kFirstMatch,
810                             NULL, &failed, NULL),
811             expect_match);
812    CHECK(!failed);
813    delete prog;
814    re->Decref();
815  }
816}
817
818void SearchNFA(int iters, const char* regexp, const StringPiece& text,
819            Prog::Anchor anchor, bool expect_match) {
820  for (int i = 0; i < iters; i++) {
821    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
822    CHECK(re);
823    Prog* prog = re->CompileToProg(0);
824    CHECK(prog);
825    CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0),
826             expect_match);
827    delete prog;
828    re->Decref();
829  }
830}
831
832void SearchOnePass(int iters, const char* regexp, const StringPiece& text,
833            Prog::Anchor anchor, bool expect_match) {
834  for (int i = 0; i < iters; i++) {
835    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
836    CHECK(re);
837    Prog* prog = re->CompileToProg(0);
838    CHECK(prog);
839    CHECK(prog->IsOnePass());
840    CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
841             expect_match);
842    delete prog;
843    re->Decref();
844  }
845}
846
847void SearchBitState(int iters, const char* regexp, const StringPiece& text,
848            Prog::Anchor anchor, bool expect_match) {
849  for (int i = 0; i < iters; i++) {
850    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
851    CHECK(re);
852    Prog* prog = re->CompileToProg(0);
853    CHECK(prog);
854    CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
855             expect_match);
856    delete prog;
857    re->Decref();
858  }
859}
860
861void SearchPCRE(int iters, const char* regexp, const StringPiece& text,
862                Prog::Anchor anchor, bool expect_match) {
863  for (int i = 0; i < iters; i++) {
864    PCRE re(regexp, PCRE::UTF8);
865    CHECK_EQ(re.error(), "");
866    if (anchor == Prog::kAnchored)
867      CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
868    else
869      CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
870  }
871}
872
873void SearchRE2(int iters, const char* regexp, const StringPiece& text,
874               Prog::Anchor anchor, bool expect_match) {
875  for (int i = 0; i < iters; i++) {
876    RE2 re(regexp);
877    CHECK_EQ(re.error(), "");
878    if (anchor == Prog::kAnchored)
879      CHECK_EQ(RE2::FullMatch(text, re), expect_match);
880    else
881      CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
882  }
883}
884
885// SearchCachedXXX is like SearchXXX but only does the
886// regexp parsing and compiling once.  This lets us measure
887// search time without the per-regexp overhead.
888
889void SearchCachedDFA(int iters, const char* regexp, const StringPiece& text,
890                     Prog::Anchor anchor, bool expect_match) {
891  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
892  CHECK(re);
893  Prog* prog = re->CompileToProg(1LL<<31);
894  CHECK(prog);
895  for (int i = 0; i < iters; i++) {
896    bool failed = false;
897    CHECK_EQ(prog->SearchDFA(text, NULL, anchor,
898                             Prog::kFirstMatch, NULL, &failed, NULL),
899             expect_match);
900    CHECK(!failed);
901  }
902  delete prog;
903  re->Decref();
904}
905
906void SearchCachedNFA(int iters, const char* regexp, const StringPiece& text,
907                     Prog::Anchor anchor, bool expect_match) {
908  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
909  CHECK(re);
910  Prog* prog = re->CompileToProg(0);
911  CHECK(prog);
912  for (int i = 0; i < iters; i++) {
913    CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0),
914             expect_match);
915  }
916  delete prog;
917  re->Decref();
918}
919
920void SearchCachedOnePass(int iters, const char* regexp, const StringPiece& text,
921                     Prog::Anchor anchor, bool expect_match) {
922  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
923  CHECK(re);
924  Prog* prog = re->CompileToProg(0);
925  CHECK(prog);
926  CHECK(prog->IsOnePass());
927  for (int i = 0; i < iters; i++)
928    CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
929             expect_match);
930  delete prog;
931  re->Decref();
932}
933
934void SearchCachedBitState(int iters, const char* regexp, const StringPiece& text,
935                     Prog::Anchor anchor, bool expect_match) {
936  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
937  CHECK(re);
938  Prog* prog = re->CompileToProg(0);
939  CHECK(prog);
940  for (int i = 0; i < iters; i++)
941    CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
942             expect_match);
943  delete prog;
944  re->Decref();
945}
946
947void SearchCachedPCRE(int iters, const char* regexp, const StringPiece& text,
948                     Prog::Anchor anchor, bool expect_match) {
949  PCRE re(regexp, PCRE::UTF8);
950  CHECK_EQ(re.error(), "");
951  for (int i = 0; i < iters; i++) {
952    if (anchor == Prog::kAnchored)
953      CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
954    else
955      CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
956  }
957}
958
959void SearchCachedRE2(int iters, const char* regexp, const StringPiece& text,
960                     Prog::Anchor anchor, bool expect_match) {
961  RE2 re(regexp);
962  CHECK_EQ(re.error(), "");
963  for (int i = 0; i < iters; i++) {
964    if (anchor == Prog::kAnchored)
965      CHECK_EQ(RE2::FullMatch(text, re), expect_match);
966    else
967      CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
968  }
969}
970
971
972// Runs implementation to full match regexp against text,
973// extracting three submatches.  Expects match always.
974
975void Parse3NFA(int iters, const char* regexp, const StringPiece& text) {
976  for (int i = 0; i < iters; i++) {
977    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
978    CHECK(re);
979    Prog* prog = re->CompileToProg(0);
980    CHECK(prog);
981    StringPiece sp[4];  // 4 because sp[0] is whole match.
982    CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4));
983    delete prog;
984    re->Decref();
985  }
986}
987
988void Parse3OnePass(int iters, const char* regexp, const StringPiece& text) {
989  for (int i = 0; i < iters; i++) {
990    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
991    CHECK(re);
992    Prog* prog = re->CompileToProg(0);
993    CHECK(prog);
994    CHECK(prog->IsOnePass());
995    StringPiece sp[4];  // 4 because sp[0] is whole match.
996    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
997    delete prog;
998    re->Decref();
999  }
1000}
1001
1002void Parse3BitState(int iters, const char* regexp, const StringPiece& text) {
1003  for (int i = 0; i < iters; i++) {
1004    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1005    CHECK(re);
1006    Prog* prog = re->CompileToProg(0);
1007    CHECK(prog);
1008    StringPiece sp[4];  // 4 because sp[0] is whole match.
1009    CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1010    delete prog;
1011    re->Decref();
1012  }
1013}
1014
1015void Parse3Backtrack(int iters, const char* regexp, const StringPiece& text) {
1016  for (int i = 0; i < iters; i++) {
1017    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1018    CHECK(re);
1019    Prog* prog = re->CompileToProg(0);
1020    CHECK(prog);
1021    StringPiece sp[4];  // 4 because sp[0] is whole match.
1022    CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1023    delete prog;
1024    re->Decref();
1025  }
1026}
1027
1028void Parse3PCRE(int iters, const char* regexp, const StringPiece& text) {
1029  for (int i = 0; i < iters; i++) {
1030    PCRE re(regexp, PCRE::UTF8);
1031    CHECK_EQ(re.error(), "");
1032    StringPiece sp1, sp2, sp3;
1033    CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1034  }
1035}
1036
1037void Parse3RE2(int iters, const char* regexp, const StringPiece& text) {
1038  for (int i = 0; i < iters; i++) {
1039    RE2 re(regexp);
1040    CHECK_EQ(re.error(), "");
1041    StringPiece sp1, sp2, sp3;
1042    CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1043  }
1044}
1045
1046void Parse3CachedNFA(int iters, const char* regexp, const StringPiece& text) {
1047  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1048  CHECK(re);
1049  Prog* prog = re->CompileToProg(0);
1050  CHECK(prog);
1051  StringPiece sp[4];  // 4 because sp[0] is whole match.
1052  for (int i = 0; i < iters; i++) {
1053    CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1054  }
1055  delete prog;
1056  re->Decref();
1057}
1058
1059void Parse3CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
1060  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1061  CHECK(re);
1062  Prog* prog = re->CompileToProg(0);
1063  CHECK(prog);
1064  CHECK(prog->IsOnePass());
1065  StringPiece sp[4];  // 4 because sp[0] is whole match.
1066  for (int i = 0; i < iters; i++)
1067    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1068  delete prog;
1069  re->Decref();
1070}
1071
1072void Parse3CachedBitState(int iters, const char* regexp, const StringPiece& text) {
1073  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1074  CHECK(re);
1075  Prog* prog = re->CompileToProg(0);
1076  CHECK(prog);
1077  StringPiece sp[4];  // 4 because sp[0] is whole match.
1078  for (int i = 0; i < iters; i++)
1079    CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1080  delete prog;
1081  re->Decref();
1082}
1083
1084void Parse3CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
1085  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1086  CHECK(re);
1087  Prog* prog = re->CompileToProg(0);
1088  CHECK(prog);
1089  StringPiece sp[4];  // 4 because sp[0] is whole match.
1090  for (int i = 0; i < iters; i++)
1091    CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1092  delete prog;
1093  re->Decref();
1094}
1095
1096void Parse3CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
1097  PCRE re(regexp, PCRE::UTF8);
1098  CHECK_EQ(re.error(), "");
1099  StringPiece sp1, sp2, sp3;
1100  for (int i = 0; i < iters; i++) {
1101    CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1102  }
1103}
1104
1105void Parse3CachedRE2(int iters, const char* regexp, const StringPiece& text) {
1106  RE2 re(regexp);
1107  CHECK_EQ(re.error(), "");
1108  StringPiece sp1, sp2, sp3;
1109  for (int i = 0; i < iters; i++) {
1110    CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1111  }
1112}
1113
1114
1115// Runs implementation to full match regexp against text,
1116// extracting three submatches.  Expects match always.
1117
1118void Parse1NFA(int iters, const char* regexp, const StringPiece& text) {
1119  for (int i = 0; i < iters; i++) {
1120    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1121    CHECK(re);
1122    Prog* prog = re->CompileToProg(0);
1123    CHECK(prog);
1124    StringPiece sp[2];  // 2 because sp[0] is whole match.
1125    CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1126    delete prog;
1127    re->Decref();
1128  }
1129}
1130
1131void Parse1OnePass(int iters, const char* regexp, const StringPiece& text) {
1132  for (int i = 0; i < iters; i++) {
1133    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1134    CHECK(re);
1135    Prog* prog = re->CompileToProg(0);
1136    CHECK(prog);
1137    CHECK(prog->IsOnePass());
1138    StringPiece sp[2];  // 2 because sp[0] is whole match.
1139    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1140    delete prog;
1141    re->Decref();
1142  }
1143}
1144
1145void Parse1BitState(int iters, const char* regexp, const StringPiece& text) {
1146  for (int i = 0; i < iters; i++) {
1147    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1148    CHECK(re);
1149    Prog* prog = re->CompileToProg(0);
1150    CHECK(prog);
1151    StringPiece sp[2];  // 2 because sp[0] is whole match.
1152    CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1153    delete prog;
1154    re->Decref();
1155  }
1156}
1157
1158void Parse1PCRE(int iters, const char* regexp, const StringPiece& text) {
1159  for (int i = 0; i < iters; i++) {
1160    PCRE re(regexp, PCRE::UTF8);
1161    CHECK_EQ(re.error(), "");
1162    StringPiece sp1;
1163    CHECK(PCRE::FullMatch(text, re, &sp1));
1164  }
1165}
1166
1167void Parse1RE2(int iters, const char* regexp, const StringPiece& text) {
1168  for (int i = 0; i < iters; i++) {
1169    RE2 re(regexp);
1170    CHECK_EQ(re.error(), "");
1171    StringPiece sp1;
1172    CHECK(RE2::FullMatch(text, re, &sp1));
1173  }
1174}
1175
1176void Parse1CachedNFA(int iters, const char* regexp, const StringPiece& text) {
1177  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1178  CHECK(re);
1179  Prog* prog = re->CompileToProg(0);
1180  CHECK(prog);
1181  StringPiece sp[2];  // 2 because sp[0] is whole match.
1182  for (int i = 0; i < iters; i++) {
1183    CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1184  }
1185  delete prog;
1186  re->Decref();
1187}
1188
1189void Parse1CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
1190  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1191  CHECK(re);
1192  Prog* prog = re->CompileToProg(0);
1193  CHECK(prog);
1194  CHECK(prog->IsOnePass());
1195  StringPiece sp[2];  // 2 because sp[0] is whole match.
1196  for (int i = 0; i < iters; i++)
1197    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1198  delete prog;
1199  re->Decref();
1200}
1201
1202void Parse1CachedBitState(int iters, const char* regexp, const StringPiece& text) {
1203  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1204  CHECK(re);
1205  Prog* prog = re->CompileToProg(0);
1206  CHECK(prog);
1207  StringPiece sp[2];  // 2 because sp[0] is whole match.
1208  for (int i = 0; i < iters; i++)
1209    CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1210  delete prog;
1211  re->Decref();
1212}
1213
1214void Parse1CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
1215  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1216  CHECK(re);
1217  Prog* prog = re->CompileToProg(0);
1218  CHECK(prog);
1219  StringPiece sp[2];  // 2 because sp[0] is whole match.
1220  for (int i = 0; i < iters; i++)
1221    CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1222  delete prog;
1223  re->Decref();
1224}
1225
1226void Parse1CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
1227  PCRE re(regexp, PCRE::UTF8);
1228  CHECK_EQ(re.error(), "");
1229  StringPiece sp1;
1230  for (int i = 0; i < iters; i++) {
1231    CHECK(PCRE::FullMatch(text, re, &sp1));
1232  }
1233}
1234
1235void Parse1CachedRE2(int iters, const char* regexp, const StringPiece& text) {
1236  RE2 re(regexp);
1237  CHECK_EQ(re.error(), "");
1238  StringPiece sp1;
1239  for (int i = 0; i < iters; i++) {
1240    CHECK(RE2::FullMatch(text, re, &sp1));
1241  }
1242}
1243
1244void SearchParse2CachedPCRE(int iters, const char* regexp,
1245                            const StringPiece& text) {
1246  PCRE re(regexp, PCRE::UTF8);
1247  CHECK_EQ(re.error(), "");
1248  for (int i = 0; i < iters; i++) {
1249    StringPiece sp1, sp2;
1250    CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
1251  }
1252}
1253
1254void SearchParse2CachedRE2(int iters, const char* regexp,
1255                           const StringPiece& text) {
1256  RE2 re(regexp);
1257  CHECK_EQ(re.error(), "");
1258  for (int i = 0; i < iters; i++) {
1259    StringPiece sp1, sp2;
1260    CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
1261  }
1262}
1263
1264void SearchParse1CachedPCRE(int iters, const char* regexp,
1265                            const StringPiece& text) {
1266  PCRE re(regexp, PCRE::UTF8);
1267  CHECK_EQ(re.error(), "");
1268  for (int i = 0; i < iters; i++) {
1269    StringPiece sp1;
1270    CHECK(PCRE::PartialMatch(text, re, &sp1));
1271  }
1272}
1273
1274void SearchParse1CachedRE2(int iters, const char* regexp,
1275                           const StringPiece& text) {
1276  RE2 re(regexp);
1277  CHECK_EQ(re.error(), "");
1278  for (int i = 0; i < iters; i++) {
1279    StringPiece sp1;
1280    CHECK(RE2::PartialMatch(text, re, &sp1));
1281  }
1282}
1283
1284void EmptyPartialMatchPCRE(int n) {
1285  PCRE re("");
1286  for (int i = 0; i < n; i++) {
1287    PCRE::PartialMatch("", re);
1288  }
1289}
1290
1291void EmptyPartialMatchRE2(int n) {
1292  RE2 re("");
1293  for (int i = 0; i < n; i++) {
1294    RE2::PartialMatch("", re);
1295  }
1296}
1297#ifdef USEPCRE
1298BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1299#endif
1300BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
1301
1302void SimplePartialMatchPCRE(int n) {
1303  PCRE re("abcdefg");
1304  for (int i = 0; i < n; i++) {
1305    PCRE::PartialMatch("abcdefg", re);
1306  }
1307}
1308
1309void SimplePartialMatchRE2(int n) {
1310  RE2 re("abcdefg");
1311  for (int i = 0; i < n; i++) {
1312    RE2::PartialMatch("abcdefg", re);
1313  }
1314}
1315#ifdef USEPCRE
1316BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
1317#endif
1318BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
1319
1320static string http_text =
1321  "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
1322  "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
1323
1324void HTTPPartialMatchPCRE(int n) {
1325  StringPiece a;
1326  PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1327  for (int i = 0; i < n; i++) {
1328    PCRE::PartialMatch(http_text, re, &a);
1329  }
1330}
1331
1332void HTTPPartialMatchRE2(int n) {
1333  StringPiece a;
1334  RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1335  for (int i = 0; i < n; i++) {
1336    RE2::PartialMatch(http_text, re, &a);
1337  }
1338}
1339
1340#ifdef USEPCRE
1341BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1342#endif
1343BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1344
1345static string http_smalltext =
1346  "GET /abc HTTP/1.1";
1347
1348void SmallHTTPPartialMatchPCRE(int n) {
1349  StringPiece a;
1350  PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1351  for (int i = 0; i < n; i++) {
1352    PCRE::PartialMatch(http_text, re, &a);
1353  }
1354}
1355
1356void SmallHTTPPartialMatchRE2(int n) {
1357  StringPiece a;
1358  RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1359  for (int i = 0; i < n; i++) {
1360    RE2::PartialMatch(http_text, re, &a);
1361  }
1362}
1363
1364#ifdef USEPCRE
1365BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1366#endif
1367BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1368
1369void DotMatchPCRE(int n) {
1370  StringPiece a;
1371  PCRE re("(?-s)^(.+)");
1372  for (int i = 0; i < n; i++) {
1373    PCRE::PartialMatch(http_text, re, &a);
1374  }
1375}
1376
1377void DotMatchRE2(int n) {
1378  StringPiece a;
1379  RE2 re("(?-s)^(.+)");
1380  for (int i = 0; i < n; i++) {
1381    RE2::PartialMatch(http_text, re, &a);
1382  }
1383}
1384
1385#ifdef USEPCRE
1386BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
1387#endif
1388BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
1389
1390void ASCIIMatchPCRE(int n) {
1391  StringPiece a;
1392  PCRE re("(?-s)^([ -~]+)");
1393  for (int i = 0; i < n; i++) {
1394    PCRE::PartialMatch(http_text, re, &a);
1395  }
1396}
1397
1398void ASCIIMatchRE2(int n) {
1399  StringPiece a;
1400  RE2 re("(?-s)^([ -~]+)");
1401  for (int i = 0; i < n; i++) {
1402    RE2::PartialMatch(http_text, re, &a);
1403  }
1404}
1405
1406#ifdef USEPCRE
1407BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
1408#endif
1409BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
1410
1411void FullMatchPCRE(int iter, int n, const char *regexp) {
1412  StopBenchmarkTiming();
1413  string s;
1414  MakeText(&s, n);
1415  s += "ABCDEFGHIJ";
1416  BenchmarkMemoryUsage();
1417  PCRE re(regexp);
1418  StartBenchmarkTiming();
1419  for (int i = 0; i < iter; i++)
1420    CHECK(PCRE::FullMatch(s, re));
1421  SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n);
1422}
1423
1424void FullMatchRE2(int iter, int n, const char *regexp) {
1425  StopBenchmarkTiming();
1426  string s;
1427  MakeText(&s, n);
1428  s += "ABCDEFGHIJ";
1429  BenchmarkMemoryUsage();
1430  RE2 re(regexp, RE2::Latin1);
1431  StartBenchmarkTiming();
1432  for (int i = 0; i < iter; i++)
1433    CHECK(RE2::FullMatch(s, re));
1434  SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n);
1435}
1436
1437void FullMatch_DotStar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*"); }
1438void FullMatch_DotStar_CachedRE2(int i, int n)  { FullMatchRE2(i, n, "(?s).*"); }
1439
1440void FullMatch_DotStarDollar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*$"); }
1441void FullMatch_DotStarDollar_CachedRE2(int i, int n)  { FullMatchRE2(i, n, "(?s).*$"); }
1442
1443void FullMatch_DotStarCapture_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s)((.*)()()($))"); }
1444void FullMatch_DotStarCapture_CachedRE2(int i, int n)  { FullMatchRE2(i, n, "(?s)((.*)()()($))"); }
1445
1446#ifdef USEPCRE
1447BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
1448#endif
1449BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2,  8, 2<<20);
1450
1451#ifdef USEPCRE
1452BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
1453#endif
1454BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2,  8, 2<<20);
1455
1456#ifdef USEPCRE
1457BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
1458#endif
1459BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2,  8, 2<<20);
1460
1461}  // namespace re2
1462