dfa_test.cc revision 0d4c52358a1af421705c54bd8a9fdd8a30558a2e
10d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
20d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Use of this source code is governed by a BSD-style
30d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// license that can be found in the LICENSE file.
40d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
50d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/test.h"
60d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/thread.h"
70d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/prog.h"
80d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/re2.h"
90d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/regexp.h"
100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/testing/regexp_generator.h"
110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/testing/string_generator.h"
120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDECLARE_bool(re2_dfa_bail_when_slow);
140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(size, 8, "log2(number of DFA nodes)");
160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(repeat, 2, "Repetition count.");
170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(threads, 4, "number of threads");
180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinnamespace re2 {
200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Check that multithreaded access to DFA class works.
220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Helper thread: builds entire DFA for prog.
240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinclass BuildThread : public Thread {
250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin public:
260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  BuildThread(Prog* prog) : prog_(prog) {}
270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  virtual void Run() {
280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK(prog_->BuildEntireDFA(Prog::kFirstMatch));
290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin private:
320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  Prog* prog_;
330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin};
340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(Multithreaded, BuildEntireDFA) {
360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Create regexp with 2^FLAGS_size states in DFA.
370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  string s = "a";
380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 0; i < FLAGS_size; i++)
390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    s += "[ab]";
400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  s += "b";
410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Check that single-threaded code works.
430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  {
440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    //LOG(INFO) << s;
450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK(re);
470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    Prog* prog = re->CompileToProg(0);
480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK(prog);
490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    BuildThread* t = new BuildThread(prog);
500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    t->SetJoinable(true);
510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    t->Start();
520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    t->Join();
530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    delete t;
540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    delete prog;
550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    re->Decref();
560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Build the DFA simultaneously in a bunch of threads.
590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 0; i < FLAGS_repeat; i++) {
600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK(re);
620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    Prog* prog = re->CompileToProg(0);
630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK(prog);
640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    vector<BuildThread*> threads;
660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    for (int j = 0; j < FLAGS_threads; j++) {
670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      BuildThread *t = new BuildThread(prog);
680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      t->SetJoinable(true);
690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      threads.push_back(t);
700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    for (int j = 0; j < FLAGS_threads; j++)
720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      threads[j]->Start();
730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    for (int j = 0; j < FLAGS_threads; j++) {
740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      threads[j]->Join();
750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      delete threads[j];
760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    // One more compile, to make sure everything is okay.
790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    prog->BuildEntireDFA(Prog::kFirstMatch);
800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    delete prog;
810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    re->Decref();
820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Check that DFA size requirements are followed.
860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// BuildEntireDFA will, like SearchDFA, stop building out
870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// the DFA once the memory limits are reached.
880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(SingleThreaded, BuildEntireDFA) {
890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Create regexp with 2^30 states in DFA.
900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  string s = "a";
910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 0; i < 30; i++)
920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    s += "[ab]";
930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  s += "b";
940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  //LOG(INFO) << s;
960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  CHECK(re);
980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int max = 24;
990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 17; i < max; i++) {
1000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    int limit = 1<<i;
1010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    int usage;
1020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    //int progusage, dfamem;
1030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    {
1040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY);
1050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      Prog* prog = re->CompileToProg(limit);
1060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      CHECK(prog);
1070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      //progusage = m.HeapGrowth();
1080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      //dfamem = prog->dfa_mem();
1090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      prog->BuildEntireDFA(Prog::kFirstMatch);
1100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      prog->BuildEntireDFA(Prog::kLongestMatch);
1110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      usage = m.HeapGrowth();
1120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      delete prog;
1130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
1140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (!UsingMallocCounter)
1150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      continue;
1160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    //LOG(INFO) << StringPrintf("Limit %d: prog used %d, DFA budget %d, total %d\n",
1170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    //                          limit, progusage, dfamem, usage);
1180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK_GT(usage, limit*9/10);
1190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK_LT(usage, limit + (16<<10));  // 16kB of slop okay
1200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  re->Decref();
1220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
1230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Generates and returns a string over binary alphabet {0,1} that contains
1250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// all possible binary sequences of length n as subsequences.  The obvious
1260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// brute force method would generate a string of length n * 2^n, but this
1270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
1280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
1290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Such a string is useful for testing a DFA.  If you have a DFA
1300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// where distinct last n bytes implies distinct states, then running on a
1310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// DeBruijn string causes the DFA to need to create a new state at every
1320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// position in the input, never reusing any states until it gets to the
1330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// end of the string.  This is the worst possible case for DFA execution.
1340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic string DeBruijnString(int n) {
1350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  CHECK_LT(n, 8*sizeof(int));
1360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  CHECK_GT(n, 0);
1370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  vector<bool> did(1<<n);
1390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 0; i < 1<<n; i++)
1400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    did[i] = false;
1410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  string s;
1430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 0; i < n-1; i++)
1440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    s.append("0");
1450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int bits = 0;
1460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int mask = (1<<n) - 1;
1470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 0; i < (1<<n); i++) {
1480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    bits <<= 1;
1490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    bits &= mask;
1500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (!did[bits|1]) {
1510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      bits |= 1;
1520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      s.append("1");
1530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    } else {
1540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      s.append("0");
1550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
1560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK(!did[bits]);
1570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    did[bits] = true;
1580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  return s;
1600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
1610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Test that the DFA gets the right result even if it runs
1630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// out of memory during a search.  The regular expression
1640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 0[01]{n}$ matches a binary string of 0s and 1s only if
1650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// the (n+1)th-to-last character is a 0.  Matching this in
1660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// a single forward pass (as done by the DFA) requires
1670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// keeping one bit for each of the last n+1 characters
1680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// (whether each was a 0), or 2^(n+1) possible states.
1690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// If we run this regexp to search in a string that contains
1700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// every possible n-character binary string as a substring,
1710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// then it will have to run through at least 2^n states.
1720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// States are big data structures -- certainly more than 1 byte --
1730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// so if the DFA can search correctly while staying within a
1740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 2^n byte limit, it must be handling out-of-memory conditions
1750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// gracefully.
1760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(SingleThreaded, SearchDFA) {
1770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Choice of n is mostly arbitrary, except that:
1780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  //   * making n too big makes the test run for too long.
1790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  //   * making n too small makes the DFA refuse to run,
1800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  //     because it has so little memory compared to the program size.
1810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Empirically, n = 18 is a good compromise between the two.
1820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  const int n = 18;
1830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),
1850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                             Regexp::LikePerl, NULL);
1860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  CHECK(re);
1870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // The De Bruijn string for n ends with a 1 followed by n 0s in a row,
1890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // which is not a match for 0[01]{n}$.  Adding one more 0 is a match.
1900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  string no_match = DeBruijnString(n);
1910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  string match = no_match + "0";
1920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // The De Bruijn string is the worst case input for this regexp.
1940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // By default, the DFA will notice that it is flushing its cache
1950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // too frequently and will bail out early, so that RE2 can use the
1960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // NFA implementation instead.  (The DFA loses its speed advantage
1970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // if it can't get a good cache hit rate.)
1980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Tell the DFA to trudge along instead.
1990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  FLAGS_re2_dfa_bail_when_slow = false;
2000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int64 usage;
2020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int64 peak_usage;
2030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  {
2040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY);
2050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    Prog* prog = re->CompileToProg(1<<n);
2060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK(prog);
2070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    for (int i = 0; i < 10; i++) {
2080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      bool matched, failed = false;
2090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      matched = prog->SearchDFA(match, NULL,
2100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                Prog::kUnanchored, Prog::kFirstMatch,
2110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                NULL, &failed, NULL);
2120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      CHECK(!failed);
2130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      CHECK(matched);
2140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      matched = prog->SearchDFA(no_match, NULL,
2150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                Prog::kUnanchored, Prog::kFirstMatch,
2160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                NULL, &failed, NULL);
2170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      CHECK(!failed);
2180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      CHECK(!matched);
2190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
2200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    usage = m.HeapGrowth();
2210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    peak_usage = m.PeakHeapGrowth();
2220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    delete prog;
2230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
2240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  re->Decref();
2250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (!UsingMallocCounter)
2270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    return;
2280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  //LOG(INFO) << "usage " << usage << " " << peak_usage;
2290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  CHECK_LT(usage, 1<<n);
2300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  CHECK_LT(peak_usage, 1<<n);
2310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
2320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Helper thread: searches for match, which should match,
2340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// and no_match, which should not.
2350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinclass SearchThread : public Thread {
2360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin public:
2370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  SearchThread(Prog* prog, const StringPiece& match,
2380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin               const StringPiece& no_match)
2390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    : prog_(prog), match_(match), no_match_(no_match) {}
2400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  virtual void Run() {
2420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    for (int i = 0; i < 2; i++) {
2430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      bool matched, failed = false;
2440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      matched = prog_->SearchDFA(match_, NULL,
2450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                 Prog::kUnanchored, Prog::kFirstMatch,
2460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                 NULL, &failed, NULL);
2470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      CHECK(!failed);
2480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      CHECK(matched);
2490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      matched = prog_->SearchDFA(no_match_, NULL,
2500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                 Prog::kUnanchored, Prog::kFirstMatch,
2510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                 NULL, &failed, NULL);
2520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      CHECK(!failed);
2530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      CHECK(!matched);
2540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
2550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
2560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin private:
2580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  Prog* prog_;
2590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  StringPiece match_;
2600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  StringPiece no_match_;
2610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin};
2620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(Multithreaded, SearchDFA) {
2640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Same as single-threaded test above.
2650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  const int n = 18;
2660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),
2670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                             Regexp::LikePerl, NULL);
2680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  CHECK(re);
2690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  string no_match = DeBruijnString(n);
2700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  string match = no_match + "0";
2710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  FLAGS_re2_dfa_bail_when_slow = false;
2720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Check that single-threaded code works.
2740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  {
2750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    Prog* prog = re->CompileToProg(1<<n);
2760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK(prog);
2770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    SearchThread* t = new SearchThread(prog, match, no_match);
2780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    t->SetJoinable(true);
2790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    t->Start();
2800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    t->Join();
2810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    delete t;
2820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    delete prog;
2830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
2840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Run the search simultaneously in a bunch of threads.
2860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Reuse same flags for Multithreaded.BuildDFA above.
2870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 0; i < FLAGS_repeat; i++) {
2880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    //LOG(INFO) << "Search " << i;
2890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    Prog* prog = re->CompileToProg(1<<n);
2900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK(prog);
2910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    vector<SearchThread*> threads;
2930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    for (int j = 0; j < FLAGS_threads; j++) {
2940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      SearchThread *t = new SearchThread(prog, match, no_match);
2950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      t->SetJoinable(true);
2960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      threads.push_back(t);
2970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
2980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    for (int j = 0; j < FLAGS_threads; j++)
2990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      threads[j]->Start();
3000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    for (int j = 0; j < FLAGS_threads; j++) {
3010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      threads[j]->Join();
3020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      delete threads[j];
3030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
3040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    delete prog;
3050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
3060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  re->Decref();
3070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
3080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
3090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstruct ReverseTest {
3100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  const char *regexp;
3110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  const char *text;
3120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  bool match;
3130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin};
3140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
3150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Test that reverse DFA handles anchored/unanchored correctly.
3160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// It's in the DFA interface but not used by RE2.
3170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinReverseTest reverse_tests[] = {
3180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\A(a|b)", "abc", true },
3190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(a|b)\\z", "cba", true },
3200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "\\A(a|b)", "cba", false },
3210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  { "(a|b)\\z", "abc", false },
3220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin};
3230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
3240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(DFA, ReverseMatch) {
3250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int nfail = 0;
3260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (int i = 0; i < arraysize(reverse_tests); i++) {
3270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    const ReverseTest& t = reverse_tests[i];
3280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
3290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK(re);
3300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    Prog *prog = re->CompileToReverseProg(0);
3310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    CHECK(prog);
3320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    bool failed = false;
3330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    bool matched = prog->SearchDFA(t.text, NULL, Prog::kUnanchored, Prog::kFirstMatch, NULL, &failed, NULL);
3340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (matched != t.match) {
3350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      LOG(ERROR) << t.regexp << " on " << t.text << ": want " << t.match;
3360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      nfail++;
3370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
3380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    delete prog;
3390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    re->Decref();
3400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
3410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  EXPECT_EQ(nfail, 0);
3420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
3430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
3440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}  // namespace re2
345