1// Copyright 2006 The RE2 Authors. All Rights Reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Dump the regexp into a string showing structure. 6// Tested by parse_unittest.cc 7 8// This function traverses the regexp recursively, 9// meaning that on inputs like Regexp::Simplify of 10// a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100}, 11// it takes time and space exponential in the size of the 12// original regular expression. It can also use stack space 13// linear in the size of the regular expression for inputs 14// like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*. 15// IT IS NOT SAFE TO CALL FROM PRODUCTION CODE. 16// As a result, Dump is provided only in the testing 17// library (see BUILD). 18 19#include <string> 20#include <vector> 21#include "util/test.h" 22#include "re2/stringpiece.h" 23#include "re2/regexp.h" 24 25// Cause a link error if this file is used outside of testing. 26DECLARE_string(test_tmpdir); 27 28namespace re2 { 29 30static const char* kOpcodeNames[] = { 31 "bad", 32 "no", 33 "emp", 34 "lit", 35 "str", 36 "cat", 37 "alt", 38 "star", 39 "plus", 40 "que", 41 "rep", 42 "cap", 43 "dot", 44 "byte", 45 "bol", 46 "eol", 47 "wb", // kRegexpWordBoundary 48 "nwb", // kRegexpNoWordBoundary 49 "bot", 50 "eot", 51 "cc", 52 "match", 53}; 54 55// Create string representation of regexp with explicit structure. 56// Nothing pretty, just for testing. 57static void DumpRegexpAppending(Regexp* re, string* s) { 58 if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) { 59 StringAppendF(s, "op%d", re->op()); 60 } else { 61 switch (re->op()) { 62 default: 63 break; 64 case kRegexpStar: 65 case kRegexpPlus: 66 case kRegexpQuest: 67 case kRegexpRepeat: 68 if (re->parse_flags() & Regexp::NonGreedy) 69 s->append("n"); 70 break; 71 } 72 s->append(kOpcodeNames[re->op()]); 73 if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) { 74 Rune r = re->rune(); 75 if ('a' <= r && r <= 'z') 76 s->append("fold"); 77 } 78 if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) { 79 for (int i = 0; i < re->nrunes(); i++) { 80 Rune r = re->runes()[i]; 81 if ('a' <= r && r <= 'z') { 82 s->append("fold"); 83 break; 84 } 85 } 86 } 87 } 88 s->append("{"); 89 switch (re->op()) { 90 default: 91 break; 92 case kRegexpEndText: 93 if (!(re->parse_flags() & Regexp::WasDollar)) { 94 s->append("\\z"); 95 } 96 break; 97 case kRegexpLiteral: { 98 Rune r = re->rune(); 99 char buf[UTFmax+1]; 100 buf[runetochar(buf, &r)] = 0; 101 s->append(buf); 102 break; 103 } 104 case kRegexpLiteralString: 105 for (int i = 0; i < re->nrunes(); i++) { 106 Rune r = re->runes()[i]; 107 char buf[UTFmax+1]; 108 buf[runetochar(buf, &r)] = 0; 109 s->append(buf); 110 } 111 break; 112 case kRegexpConcat: 113 case kRegexpAlternate: 114 for (int i = 0; i < re->nsub(); i++) 115 DumpRegexpAppending(re->sub()[i], s); 116 break; 117 case kRegexpStar: 118 case kRegexpPlus: 119 case kRegexpQuest: 120 DumpRegexpAppending(re->sub()[0], s); 121 break; 122 case kRegexpCapture: 123 if (re->name()) { 124 s->append(*re->name()); 125 s->append(":"); 126 } 127 DumpRegexpAppending(re->sub()[0], s); 128 break; 129 case kRegexpRepeat: 130 s->append(StringPrintf("%d,%d ", re->min(), re->max())); 131 DumpRegexpAppending(re->sub()[0], s); 132 break; 133 case kRegexpCharClass: { 134 string sep; 135 for (CharClass::iterator it = re->cc()->begin(); 136 it != re->cc()->end(); ++it) { 137 RuneRange rr = *it; 138 s->append(sep); 139 if (rr.lo == rr.hi) 140 s->append(StringPrintf("%#x", rr.lo)); 141 else 142 s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi)); 143 sep = " "; 144 } 145 break; 146 } 147 } 148 s->append("}"); 149} 150 151string Regexp::Dump() { 152 string s; 153 154 // Make sure being called from a unit test. 155 if (FLAGS_test_tmpdir.empty()) { 156 LOG(ERROR) << "Cannot use except for testing."; 157 return s; 158 } 159 160 DumpRegexpAppending(this, &s); 161 return s; 162} 163 164} // namespace re2 165