15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/test.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/thread.h"
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prog.h"
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/re2.h"
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/regexp.h"
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/testing/regexp_generator.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/testing/string_generator.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DECLARE_bool(re2_dfa_bail_when_slow);
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_int32(size, 8, "log2(number of DFA nodes)");
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_int32(repeat, 2, "Repetition count.");
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DEFINE_int32(threads, 4, "number of threads");
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 {
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Check that multithreaded access to DFA class works.
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Helper thread: builds entire DFA for prog.
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class BuildThread : public Thread {
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BuildThread(Prog* prog) : prog_(prog) {}
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual void Run() {
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK(prog_->BuildEntireDFA(Prog::kFirstMatch));
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Prog* prog_;
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST(Multithreaded, BuildEntireDFA) {
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Create regexp with 2^FLAGS_size states in DFA.
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string s = "a";
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < FLAGS_size; i++)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s += "[ab]";
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  s += "b";
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check that single-threaded code works.
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  {
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    //LOG(INFO) << s;
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK(re);
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Prog* prog = re->CompileToProg(0);
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK(prog);
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    BuildThread* t = new BuildThread(prog);
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t->SetJoinable(true);
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t->Start();
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t->Join();
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete t;
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete prog;
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    re->Decref();
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Build the DFA simultaneously in a bunch of threads.
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < FLAGS_repeat; i++) {
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK(re);
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Prog* prog = re->CompileToProg(0);
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK(prog);
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    vector<BuildThread*> threads;
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int j = 0; j < FLAGS_threads; j++) {
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      BuildThread *t = new BuildThread(prog);
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t->SetJoinable(true);
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      threads.push_back(t);
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int j = 0; j < FLAGS_threads; j++)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      threads[j]->Start();
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int j = 0; j < FLAGS_threads; j++) {
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      threads[j]->Join();
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      delete threads[j];
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // One more compile, to make sure everything is okay.
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    prog->BuildEntireDFA(Prog::kFirstMatch);
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete prog;
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    re->Decref();
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Check that DFA size requirements are followed.
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// BuildEntireDFA will, like SearchDFA, stop building out
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the DFA once the memory limits are reached.
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST(SingleThreaded, BuildEntireDFA) {
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Create regexp with 2^30 states in DFA.
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string s = "a";
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < 30; i++)
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s += "[ab]";
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  s += "b";
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //LOG(INFO) << s;
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK(re);
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int max = 24;
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 17; i < max; i++) {
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int limit = 1<<i;
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int usage;
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    //int progusage, dfamem;
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY);
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      Prog* prog = re->CompileToProg(limit);
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CHECK(prog);
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      //progusage = m.HeapGrowth();
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      //dfamem = prog->dfa_mem();
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      prog->BuildEntireDFA(Prog::kFirstMatch);
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      prog->BuildEntireDFA(Prog::kLongestMatch);
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      usage = m.HeapGrowth();
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      delete prog;
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!UsingMallocCounter)
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      continue;
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    //LOG(INFO) << StringPrintf("Limit %d: prog used %d, DFA budget %d, total %d\n",
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    //                          limit, progusage, dfamem, usage);
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK_GT(usage, limit*9/10);
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK_LT(usage, limit + (16<<10));  // 16kB of slop okay
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  re->Decref();
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Generates and returns a string over binary alphabet {0,1} that contains
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// all possible binary sequences of length n as subsequences.  The obvious
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// brute force method would generate a string of length n * 2^n, but this
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Such a string is useful for testing a DFA.  If you have a DFA
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// where distinct last n bytes implies distinct states, then running on a
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// DeBruijn string causes the DFA to need to create a new state at every
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// position in the input, never reusing any states until it gets to the
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// end of the string.  This is the worst possible case for DFA execution.
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static string DeBruijnString(int n) {
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK_LT(n, 8*sizeof(int));
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK_GT(n, 0);
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  vector<bool> did(1<<n);
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < 1<<n; i++)
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    did[i] = false;
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string s;
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < n-1; i++)
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    s.append("0");
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int bits = 0;
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int mask = (1<<n) - 1;
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < (1<<n); i++) {
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bits <<= 1;
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bits &= mask;
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!did[bits|1]) {
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      bits |= 1;
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      s.append("1");
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      s.append("0");
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK(!did[bits]);
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    did[bits] = true;
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return s;
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Test that the DFA gets the right result even if it runs
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// out of memory during a search.  The regular expression
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 0[01]{n}$ matches a binary string of 0s and 1s only if
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the (n+1)th-to-last character is a 0.  Matching this in
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// a single forward pass (as done by the DFA) requires
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// keeping one bit for each of the last n+1 characters
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// (whether each was a 0), or 2^(n+1) possible states.
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If we run this regexp to search in a string that contains
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// every possible n-character binary string as a substring,
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// then it will have to run through at least 2^n states.
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// States are big data structures -- certainly more than 1 byte --
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// so if the DFA can search correctly while staying within a
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 2^n byte limit, it must be handling out-of-memory conditions
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// gracefully.
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST(SingleThreaded, SearchDFA) {
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Choice of n is mostly arbitrary, except that:
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   * making n too big makes the test run for too long.
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   * making n too small makes the DFA refuse to run,
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //     because it has so little memory compared to the program size.
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Empirically, n = 18 is a good compromise between the two.
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int n = 18;
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             Regexp::LikePerl, NULL);
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK(re);
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The De Bruijn string for n ends with a 1 followed by n 0s in a row,
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // which is not a match for 0[01]{n}$.  Adding one more 0 is a match.
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string no_match = DeBruijnString(n);
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string match = no_match + "0";
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The De Bruijn string is the worst case input for this regexp.
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // By default, the DFA will notice that it is flushing its cache
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // too frequently and will bail out early, so that RE2 can use the
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // NFA implementation instead.  (The DFA loses its speed advantage
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // if it can't get a good cache hit rate.)
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Tell the DFA to trudge along instead.
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FLAGS_re2_dfa_bail_when_slow = false;
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int64 usage;
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int64 peak_usage;
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  {
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY);
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Prog* prog = re->CompileToProg(1<<n);
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK(prog);
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int i = 0; i < 10; i++) {
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      bool matched, failed = false;
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      matched = prog->SearchDFA(match, NULL,
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                Prog::kUnanchored, Prog::kFirstMatch,
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                NULL, &failed, NULL);
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CHECK(!failed);
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CHECK(matched);
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      matched = prog->SearchDFA(no_match, NULL,
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                Prog::kUnanchored, Prog::kFirstMatch,
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                NULL, &failed, NULL);
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CHECK(!failed);
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CHECK(!matched);
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    usage = m.HeapGrowth();
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    peak_usage = m.PeakHeapGrowth();
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete prog;
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  re->Decref();
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!UsingMallocCounter)
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //LOG(INFO) << "usage " << usage << " " << peak_usage;
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK_LT(usage, 1<<n);
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK_LT(peak_usage, 1<<n);
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Helper thread: searches for match, which should match,
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and no_match, which should not.
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class SearchThread : public Thread {
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SearchThread(Prog* prog, const StringPiece& match,
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               const StringPiece& no_match)
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : prog_(prog), match_(match), no_match_(no_match) {}
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual void Run() {
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int i = 0; i < 2; i++) {
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      bool matched, failed = false;
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      matched = prog_->SearchDFA(match_, NULL,
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 Prog::kUnanchored, Prog::kFirstMatch,
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 NULL, &failed, NULL);
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CHECK(!failed);
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CHECK(matched);
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      matched = prog_->SearchDFA(no_match_, NULL,
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 Prog::kUnanchored, Prog::kFirstMatch,
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 NULL, &failed, NULL);
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CHECK(!failed);
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CHECK(!matched);
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Prog* prog_;
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece match_;
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece no_match_;
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST(Multithreaded, SearchDFA) {
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Same as single-threaded test above.
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int n = 18;
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             Regexp::LikePerl, NULL);
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK(re);
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string no_match = DeBruijnString(n);
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string match = no_match + "0";
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FLAGS_re2_dfa_bail_when_slow = false;
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check that single-threaded code works.
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  {
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Prog* prog = re->CompileToProg(1<<n);
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK(prog);
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SearchThread* t = new SearchThread(prog, match, no_match);
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t->SetJoinable(true);
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t->Start();
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t->Join();
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete t;
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete prog;
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Run the search simultaneously in a bunch of threads.
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Reuse same flags for Multithreaded.BuildDFA above.
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < FLAGS_repeat; i++) {
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    //LOG(INFO) << "Search " << i;
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Prog* prog = re->CompileToProg(1<<n);
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK(prog);
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    vector<SearchThread*> threads;
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int j = 0; j < FLAGS_threads; j++) {
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SearchThread *t = new SearchThread(prog, match, no_match);
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t->SetJoinable(true);
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      threads.push_back(t);
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int j = 0; j < FLAGS_threads; j++)
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      threads[j]->Start();
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int j = 0; j < FLAGS_threads; j++) {
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      threads[j]->Join();
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      delete threads[j];
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete prog;
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  re->Decref();
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct ReverseTest {
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char *regexp;
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char *text;
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool match;
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Test that reverse DFA handles anchored/unanchored correctly.
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// It's in the DFA interface but not used by RE2.
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ReverseTest reverse_tests[] = {
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\A(a|b)", "abc", true },
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(a|b)\\z", "cba", true },
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "\\A(a|b)", "cba", false },
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  { "(a|b)\\z", "abc", false },
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TEST(DFA, ReverseMatch) {
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nfail = 0;
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < arraysize(reverse_tests); i++) {
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const ReverseTest& t = reverse_tests[i];
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK(re);
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Prog *prog = re->CompileToReverseProg(0);
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK(prog);
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool failed = false;
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool matched = prog->SearchDFA(t.text, NULL, Prog::kUnanchored, Prog::kFirstMatch, NULL, &failed, NULL);
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (matched != t.match) {
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      LOG(ERROR) << t.regexp << " on " << t.text << ": want " << t.match;
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      nfail++;
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete prog;
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    re->Decref();
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_EQ(nfail, 0);
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace re2
345