10d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Copyright 2006 The RE2 Authors. All Rights Reserved. 20d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Use of this source code is governed by a BSD-style 30d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// license that can be found in the LICENSE file. 40d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 50d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Dump the regexp into a string showing structure. 60d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Tested by parse_unittest.cc 70d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 80d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// This function traverses the regexp recursively, 90d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// meaning that on inputs like Regexp::Simplify of 100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}, 110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// it takes time and space exponential in the size of the 120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// original regular expression. It can also use stack space 130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// linear in the size of the regular expression for inputs 140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*. 150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// IT IS NOT SAFE TO CALL FROM PRODUCTION CODE. 160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// As a result, Dump is provided only in the testing 170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// library (see BUILD). 180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <string> 200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <vector> 210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/test.h" 220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/stringpiece.h" 230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/regexp.h" 240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Cause a link error if this file is used outside of testing. 260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDECLARE_string(test_tmpdir); 270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinnamespace re2 { 290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic const char* kOpcodeNames[] = { 310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "bad", 320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "no", 330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "emp", 340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "lit", 350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "str", 360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "cat", 370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "alt", 380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "star", 390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "plus", 400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "que", 410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "rep", 420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "cap", 430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "dot", 440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "byte", 450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "bol", 460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "eol", 470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "wb", // kRegexpWordBoundary 480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "nwb", // kRegexpNoWordBoundary 490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "bot", 500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "eot", 510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "cc", 520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin "match", 530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}; 540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Create string representation of regexp with explicit structure. 560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Nothing pretty, just for testing. 570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic void DumpRegexpAppending(Regexp* re, string* s) { 580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) { 590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin StringAppendF(s, "op%d", re->op()); 600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } else { 610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin switch (re->op()) { 620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin default: 630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpStar: 650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpPlus: 660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpQuest: 670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpRepeat: 680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (re->parse_flags() & Regexp::NonGreedy) 690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append("n"); 700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append(kOpcodeNames[re->op()]); 730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) { 740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Rune r = re->rune(); 750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if ('a' <= r && r <= 'z') 760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append("fold"); 770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) { 790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < re->nrunes(); i++) { 800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Rune r = re->runes()[i]; 810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if ('a' <= r && r <= 'z') { 820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append("fold"); 830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append("{"); 890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin switch (re->op()) { 900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin default: 910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpEndText: 930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (!(re->parse_flags() & Regexp::WasDollar)) { 940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append("\\z"); 950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpLiteral: { 980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Rune r = re->rune(); 990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin char buf[UTFmax+1]; 1000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin buf[runetochar(buf, &r)] = 0; 1010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append(buf); 1020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 1030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpLiteralString: 1050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < re->nrunes(); i++) { 1060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Rune r = re->runes()[i]; 1070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin char buf[UTFmax+1]; 1080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin buf[runetochar(buf, &r)] = 0; 1090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append(buf); 1100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 1120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpConcat: 1130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpAlternate: 1140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < re->nsub(); i++) 1150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin DumpRegexpAppending(re->sub()[i], s); 1160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 1170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpStar: 1180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpPlus: 1190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpQuest: 1200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin DumpRegexpAppending(re->sub()[0], s); 1210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 1220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpCapture: 1230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (re->name()) { 1240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append(*re->name()); 1250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append(":"); 1260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin DumpRegexpAppending(re->sub()[0], s); 1280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 1290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpRepeat: 1300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append(StringPrintf("%d,%d ", re->min(), re->max())); 1310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin DumpRegexpAppending(re->sub()[0], s); 1320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 1330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kRegexpCharClass: { 1340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string sep; 1350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (CharClass::iterator it = re->cc()->begin(); 1360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin it != re->cc()->end(); ++it) { 1370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin RuneRange rr = *it; 1380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append(sep); 1390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (rr.lo == rr.hi) 1400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append(StringPrintf("%#x", rr.lo)); 1410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin else 1420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi)); 1430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin sep = " "; 1440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin break; 1460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin s->append("}"); 1490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 1500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstring Regexp::Dump() { 1520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin string s; 1530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Make sure being called from a unit test. 1550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (FLAGS_test_tmpdir.empty()) { 1560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin LOG(ERROR) << "Cannot use except for testing."; 1570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return s; 1580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin DumpRegexpAppending(this, &s); 1610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return s; 1620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 1630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} // namespace re2 165