15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2008 The RE2 Authors.  All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Comparative tester for regular expression matching.
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Checks all implementations against each other.
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef RE2_TESTING_TESTER_H__
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define RE2_TESTING_TESTER_H__
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/stringpiece.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prog.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/regexp.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/re2.h"
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/pcre.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 {
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Regexp;
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// All the supported regexp engines.
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)enum Engine {
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kEngineBacktrack = 0,    // Prog::BadSearchBacktrack
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kEngineNFA,              // Prog::SearchNFA
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kEngineDFA,              // Prog::SearchDFA, only ask whether it matched
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kEngineDFA1,             // Prog::SearchDFA, ask for match[0]
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kEngineOnePass,          // Prog::SearchOnePass, if applicable
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kEngineBitState,         // Prog::SearchBitState
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kEngineRE2,              // RE2, all submatches
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kEngineRE2a,             // RE2, only ask for match[0]
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kEngineRE2b,             // RE2, only ask whether it matched
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kEnginePCRE,             // PCRE (util/pcre.h)
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  kEngineMax,
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Make normal math on the enum preserve the type.
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// By default, C++ doesn't define ++ on enum, and e+1 has type int.
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static inline void operator++(Engine& e, int unused) {
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  e = static_cast<Engine>(e+1);
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static inline Engine operator+(Engine e, int i) {
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return static_cast<Engine>(static_cast<int>(e)+i);
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A TestInstance caches per-regexp state for a given
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// regular expression in a given configuration
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// (UTF-8 vs Latin1, longest vs first match, etc.).
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class TestInstance {
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  struct Result;
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TestInstance(const StringPiece& regexp, Prog::MatchKind kind,
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               Regexp::ParseFlags flags);
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~TestInstance();
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Regexp::ParseFlags flags() { return flags_; }
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool error() { return error_; }
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Runs a single test case: search in text, which is in context,
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // using the given anchoring.
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool RunCase(const StringPiece& text, const StringPiece& context,
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               Prog::Anchor anchor);
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Runs a single search using the named engine type.
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void RunSearch(Engine type,
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 const StringPiece& text, const StringPiece& context,
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 Prog::Anchor anchor,
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 Result *result);
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void LogMatch(const char* prefix, Engine e, const StringPiece& text,
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                const StringPiece& context, Prog::Anchor anchor);
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const StringPiece& regexp_str_;   // regexp being tested
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Prog::MatchKind kind_;            // kind of match
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Regexp::ParseFlags flags_;        // flags for parsing regexp_str_
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool error_;                      // error during constructor?
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Regexp* regexp_;                  // parsed regexp
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int num_captures_;                // regexp_->NumCaptures() cached
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Prog* prog_;                      // compiled program
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Prog* rprog_;                     // compiled reverse program
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PCRE* re_;                        // PCRE implementation
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RE2* re2_;                        // RE2 implementation
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_EVIL_CONSTRUCTORS(TestInstance);
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A group of TestInstances for all possible configurations.
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Tester {
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  explicit Tester(const StringPiece& regexp);
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~Tester();
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool error() { return error_; }
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Runs a single test case: search in text, which is in context,
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // using the given anchoring.
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool TestCase(const StringPiece& text, const StringPiece& context,
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                Prog::Anchor anchor);
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Run TestCase(text, text, anchor) for all anchoring modes.
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool TestInput(const StringPiece& text);
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Run TestCase(text, context, anchor) for all anchoring modes.
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool TestInputInContext(const StringPiece& text, const StringPiece& context);
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool error_;
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  vector<TestInstance*> v_;
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_EVIL_CONSTRUCTORS(Tester);
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Run all possible tests using regexp and text.
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text);
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace re2
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // RE2_TESTING_TESTER_H__
122