dfa_test.cc revision 0d4c52358a1af421705c54bd8a9fdd8a30558a2e
10d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Copyright 2006-2008 The RE2 Authors. All Rights Reserved. 20d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Use of this source code is governed by a BSD-style 30d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// license that can be found in the LICENSE file. 40d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 50d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/test.h" 60d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/thread.h" 70d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/prog.h" 80d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/re2.h" 90d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/regexp.h" 100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/testing/regexp_generator.h" 110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/testing/string_generator.h" 120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDECLARE_bool(re2_dfa_bail_when_slow); 140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(size, 8, "log2(number of DFA nodes)"); 160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(repeat, 2, "Repetition count."); 170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDEFINE_int32(threads, 4, "number of threads"); 180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinnamespace re2 { 200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Check that multithreaded access to DFA class works. 220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Helper thread: builds entire DFA for prog. 240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinclass BuildThread : public Thread { 250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin public: 260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin BuildThread(Prog* prog) : prog_(prog) {} 270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin virtual void Run() { 280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(prog_->BuildEntireDFA(Prog::kFirstMatch)); 290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin private: 320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog* prog_; 330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}; 340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(Multithreaded, BuildEntireDFA) { 360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Create regexp with 2^FLAGS_size states in DFA. 370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string s = "a"; 380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < FLAGS_size; i++) 390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s += "[ab]"; 400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s += "b"; 410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Check that single-threaded code works. 430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { 440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin //LOG(INFO) << s; 450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL); 460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(re); 470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog* prog = re->CompileToProg(0); 480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(prog); 490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin BuildThread* t = new BuildThread(prog); 500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin t->SetJoinable(true); 510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin t->Start(); 520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin t->Join(); 530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin delete t; 540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin delete prog; 550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin re->Decref(); 560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Build the DFA simultaneously in a bunch of threads. 590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < FLAGS_repeat; i++) { 600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL); 610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(re); 620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog* prog = re->CompileToProg(0); 630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(prog); 640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin vector<BuildThread*> threads; 660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int j = 0; j < FLAGS_threads; j++) { 670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin BuildThread *t = new BuildThread(prog); 680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin t->SetJoinable(true); 690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin threads.push_back(t); 700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int j = 0; j < FLAGS_threads; j++) 720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin threads[j]->Start(); 730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int j = 0; j < FLAGS_threads; j++) { 740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin threads[j]->Join(); 750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin delete threads[j]; 760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // One more compile, to make sure everything is okay. 790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin prog->BuildEntireDFA(Prog::kFirstMatch); 800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin delete prog; 810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin re->Decref(); 820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Check that DFA size requirements are followed. 860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// BuildEntireDFA will, like SearchDFA, stop building out 870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// the DFA once the memory limits are reached. 880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(SingleThreaded, BuildEntireDFA) { 890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Create regexp with 2^30 states in DFA. 900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string s = "a"; 910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < 30; i++) 920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s += "[ab]"; 930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s += "b"; 940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin //LOG(INFO) << s; 960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL); 970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(re); 980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int max = 24; 990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 17; i < max; i++) { 1000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int limit = 1<<i; 1010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int usage; 1020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin //int progusage, dfamem; 1030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { 1040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY); 1050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog* prog = re->CompileToProg(limit); 1060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(prog); 1070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin //progusage = m.HeapGrowth(); 1080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin //dfamem = prog->dfa_mem(); 1090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin prog->BuildEntireDFA(Prog::kFirstMatch); 1100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin prog->BuildEntireDFA(Prog::kLongestMatch); 1110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin usage = m.HeapGrowth(); 1120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin delete prog; 1130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (!UsingMallocCounter) 1150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin continue; 1160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin //LOG(INFO) << StringPrintf("Limit %d: prog used %d, DFA budget %d, total %d\n", 1170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // limit, progusage, dfamem, usage); 1180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK_GT(usage, limit*9/10); 1190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK_LT(usage, limit + (16<<10)); // 16kB of slop okay 1200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin re->Decref(); 1220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 1230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Generates and returns a string over binary alphabet {0,1} that contains 1250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// all possible binary sequences of length n as subsequences. The obvious 1260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// brute force method would generate a string of length n * 2^n, but this 1270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// generates a string of length n + 2^n - 1 called a De Bruijn cycle. 1280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17. 1290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Such a string is useful for testing a DFA. If you have a DFA 1300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// where distinct last n bytes implies distinct states, then running on a 1310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// DeBruijn string causes the DFA to need to create a new state at every 1320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// position in the input, never reusing any states until it gets to the 1330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// end of the string. This is the worst possible case for DFA execution. 1340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic string DeBruijnString(int n) { 1350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK_LT(n, 8*sizeof(int)); 1360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK_GT(n, 0); 1370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin vector<bool> did(1<<n); 1390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < 1<<n; i++) 1400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin did[i] = false; 1410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string s; 1430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < n-1; i++) 1440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s.append("0"); 1450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int bits = 0; 1460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int mask = (1<<n) - 1; 1470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < (1<<n); i++) { 1480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bits <<= 1; 1490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bits &= mask; 1500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (!did[bits|1]) { 1510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bits |= 1; 1520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s.append("1"); 1530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } else { 1540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s.append("0"); 1550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(!did[bits]); 1570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin did[bits] = true; 1580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return s; 1600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 1610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Test that the DFA gets the right result even if it runs 1630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// out of memory during a search. The regular expression 1640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 0[01]{n}$ matches a binary string of 0s and 1s only if 1650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// the (n+1)th-to-last character is a 0. Matching this in 1660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// a single forward pass (as done by the DFA) requires 1670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// keeping one bit for each of the last n+1 characters 1680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// (whether each was a 0), or 2^(n+1) possible states. 1690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// If we run this regexp to search in a string that contains 1700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// every possible n-character binary string as a substring, 1710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// then it will have to run through at least 2^n states. 1720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// States are big data structures -- certainly more than 1 byte -- 1730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// so if the DFA can search correctly while staying within a 1740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 2^n byte limit, it must be handling out-of-memory conditions 1750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// gracefully. 1760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(SingleThreaded, SearchDFA) { 1770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Choice of n is mostly arbitrary, except that: 1780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // * making n too big makes the test run for too long. 1790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // * making n too small makes the DFA refuse to run, 1800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // because it has so little memory compared to the program size. 1810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Empirically, n = 18 is a good compromise between the two. 1820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const int n = 18; 1830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n), 1850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Regexp::LikePerl, NULL); 1860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(re); 1870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // The De Bruijn string for n ends with a 1 followed by n 0s in a row, 1890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // which is not a match for 0[01]{n}$. Adding one more 0 is a match. 1900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string no_match = DeBruijnString(n); 1910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string match = no_match + "0"; 1920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // The De Bruijn string is the worst case input for this regexp. 1940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // By default, the DFA will notice that it is flushing its cache 1950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // too frequently and will bail out early, so that RE2 can use the 1960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // NFA implementation instead. (The DFA loses its speed advantage 1970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // if it can't get a good cache hit rate.) 1980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Tell the DFA to trudge along instead. 1990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin FLAGS_re2_dfa_bail_when_slow = false; 2000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int64 usage; 2020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int64 peak_usage; 2030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { 2040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin testing::MallocCounter m(testing::MallocCounter::THIS_THREAD_ONLY); 2050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog* prog = re->CompileToProg(1<<n); 2060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(prog); 2070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < 10; i++) { 2080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool matched, failed = false; 2090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin matched = prog->SearchDFA(match, NULL, 2100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog::kUnanchored, Prog::kFirstMatch, 2110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin NULL, &failed, NULL); 2120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(!failed); 2130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(matched); 2140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin matched = prog->SearchDFA(no_match, NULL, 2150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog::kUnanchored, Prog::kFirstMatch, 2160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin NULL, &failed, NULL); 2170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(!failed); 2180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(!matched); 2190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin usage = m.HeapGrowth(); 2210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin peak_usage = m.PeakHeapGrowth(); 2220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin delete prog; 2230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin re->Decref(); 2250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (!UsingMallocCounter) 2270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return; 2280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin //LOG(INFO) << "usage " << usage << " " << peak_usage; 2290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK_LT(usage, 1<<n); 2300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK_LT(peak_usage, 1<<n); 2310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 2320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Helper thread: searches for match, which should match, 2340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// and no_match, which should not. 2350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinclass SearchThread : public Thread { 2360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin public: 2370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin SearchThread(Prog* prog, const StringPiece& match, 2380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const StringPiece& no_match) 2390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin : prog_(prog), match_(match), no_match_(no_match) {} 2400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin virtual void Run() { 2420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < 2; i++) { 2430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool matched, failed = false; 2440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin matched = prog_->SearchDFA(match_, NULL, 2450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog::kUnanchored, Prog::kFirstMatch, 2460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin NULL, &failed, NULL); 2470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(!failed); 2480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(matched); 2490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin matched = prog_->SearchDFA(no_match_, NULL, 2500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog::kUnanchored, Prog::kFirstMatch, 2510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin NULL, &failed, NULL); 2520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(!failed); 2530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(!matched); 2540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin private: 2580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog* prog_; 2590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin StringPiece match_; 2600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin StringPiece no_match_; 2610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}; 2620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(Multithreaded, SearchDFA) { 2640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Same as single-threaded test above. 2650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const int n = 18; 2660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n), 2670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Regexp::LikePerl, NULL); 2680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(re); 2690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string no_match = DeBruijnString(n); 2700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string match = no_match + "0"; 2710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin FLAGS_re2_dfa_bail_when_slow = false; 2720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Check that single-threaded code works. 2740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { 2750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog* prog = re->CompileToProg(1<<n); 2760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(prog); 2770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin SearchThread* t = new SearchThread(prog, match, no_match); 2780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin t->SetJoinable(true); 2790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin t->Start(); 2800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin t->Join(); 2810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin delete t; 2820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin delete prog; 2830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Run the search simultaneously in a bunch of threads. 2860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Reuse same flags for Multithreaded.BuildDFA above. 2870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < FLAGS_repeat; i++) { 2880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin //LOG(INFO) << "Search " << i; 2890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog* prog = re->CompileToProg(1<<n); 2900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(prog); 2910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin vector<SearchThread*> threads; 2930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int j = 0; j < FLAGS_threads; j++) { 2940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin SearchThread *t = new SearchThread(prog, match, no_match); 2950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin t->SetJoinable(true); 2960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin threads.push_back(t); 2970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int j = 0; j < FLAGS_threads; j++) 2990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin threads[j]->Start(); 3000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int j = 0; j < FLAGS_threads; j++) { 3010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin threads[j]->Join(); 3020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin delete threads[j]; 3030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 3040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin delete prog; 3050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 3060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin re->Decref(); 3070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 3080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 3090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstruct ReverseTest { 3100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const char *regexp; 3110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const char *text; 3120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool match; 3130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}; 3140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 3150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Test that reverse DFA handles anchored/unanchored correctly. 3160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// It's in the DFA interface but not used by RE2. 3170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinReverseTest reverse_tests[] = { 3180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\A(a|b)", "abc", true }, 3190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(a|b)\\z", "cba", true }, 3200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "\\A(a|b)", "cba", false }, 3210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin { "(a|b)\\z", "abc", false }, 3220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}; 3230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 3240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinTEST(DFA, ReverseMatch) { 3250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int nfail = 0; 3260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < arraysize(reverse_tests); i++) { 3270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const ReverseTest& t = reverse_tests[i]; 3280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL); 3290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(re); 3300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog *prog = re->CompileToReverseProg(0); 3310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(prog); 3320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool failed = false; 3330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool matched = prog->SearchDFA(t.text, NULL, Prog::kUnanchored, Prog::kFirstMatch, NULL, &failed, NULL); 3340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (matched != t.match) { 3350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin LOG(ERROR) << t.regexp << " on " << t.text << ": want " << t.match; 3360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin nfail++; 3370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 3380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin delete prog; 3390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin re->Decref(); 3400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 3410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin EXPECT_EQ(nfail, 0); 3420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 3430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 3440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} // namespace re2 345