15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2008 The RE2 Authors. All Rights Reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Comparative tester for regular expression matching. 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Checks all implementations against each other. 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef RE2_TESTING_TESTER_H__ 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define RE2_TESTING_TESTER_H__ 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/stringpiece.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prog.h" 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/regexp.h" 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/re2.h" 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/pcre.h" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 { 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Regexp; 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// All the supported regexp engines. 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)enum Engine { 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kEngineBacktrack = 0, // Prog::BadSearchBacktrack 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kEngineNFA, // Prog::SearchNFA 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kEngineDFA, // Prog::SearchDFA, only ask whether it matched 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kEngineDFA1, // Prog::SearchDFA, ask for match[0] 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kEngineOnePass, // Prog::SearchOnePass, if applicable 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kEngineBitState, // Prog::SearchBitState 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kEngineRE2, // RE2, all submatches 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kEngineRE2a, // RE2, only ask for match[0] 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kEngineRE2b, // RE2, only ask whether it matched 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kEnginePCRE, // PCRE (util/pcre.h) 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kEngineMax, 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Make normal math on the enum preserve the type. 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// By default, C++ doesn't define ++ on enum, and e+1 has type int. 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static inline void operator++(Engine& e, int unused) { 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) e = static_cast<Engine>(e+1); 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static inline Engine operator+(Engine e, int i) { 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return static_cast<Engine>(static_cast<int>(e)+i); 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A TestInstance caches per-regexp state for a given 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// regular expression in a given configuration 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// (UTF-8 vs Latin1, longest vs first match, etc.). 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class TestInstance { 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) struct Result; 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) TestInstance(const StringPiece& regexp, Prog::MatchKind kind, 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Regexp::ParseFlags flags); 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ~TestInstance(); 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Regexp::ParseFlags flags() { return flags_; } 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool error() { return error_; } 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Runs a single test case: search in text, which is in context, 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // using the given anchoring. 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool RunCase(const StringPiece& text, const StringPiece& context, 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog::Anchor anchor); 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Runs a single search using the named engine type. 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void RunSearch(Engine type, 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece& text, const StringPiece& context, 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog::Anchor anchor, 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Result *result); 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void LogMatch(const char* prefix, Engine e, const StringPiece& text, 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece& context, Prog::Anchor anchor); 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece& regexp_str_; // regexp being tested 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog::MatchKind kind_; // kind of match 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Regexp::ParseFlags flags_; // flags for parsing regexp_str_ 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool error_; // error during constructor? 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Regexp* regexp_; // parsed regexp 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int num_captures_; // regexp_->NumCaptures() cached 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog* prog_; // compiled program 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog* rprog_; // compiled reverse program 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PCRE* re_; // PCRE implementation 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RE2* re2_; // RE2 implementation 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DISALLOW_EVIL_CONSTRUCTORS(TestInstance); 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A group of TestInstances for all possible configurations. 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Tester { 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) explicit Tester(const StringPiece& regexp); 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ~Tester(); 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool error() { return error_; } 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Runs a single test case: search in text, which is in context, 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // using the given anchoring. 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool TestCase(const StringPiece& text, const StringPiece& context, 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog::Anchor anchor); 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Run TestCase(text, text, anchor) for all anchoring modes. 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool TestInput(const StringPiece& text); 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Run TestCase(text, context, anchor) for all anchoring modes. 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool TestInputInContext(const StringPiece& text, const StringPiece& context); 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool error_; 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) vector<TestInstance*> v_; 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DISALLOW_EVIL_CONSTRUCTORS(Tester); 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Run all possible tests using regexp and text. 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text); 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace re2 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // RE2_TESTING_TESTER_H__ 122