dump.cc revision 0d4c52358a1af421705c54bd8a9fdd8a30558a2e
1// Copyright 2006 The RE2 Authors.  All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Dump the regexp into a string showing structure.
6// Tested by parse_unittest.cc
7
8// This function traverses the regexp recursively,
9// meaning that on inputs like Regexp::Simplify of
10// a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100},
11// it takes time and space exponential in the size of the
12// original regular expression.  It can also use stack space
13// linear in the size of the regular expression for inputs
14// like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*.
15// IT IS NOT SAFE TO CALL FROM PRODUCTION CODE.
16// As a result, Dump is provided only in the testing
17// library (see BUILD).
18
19#include <string>
20#include <vector>
21#include "util/test.h"
22#include "re2/stringpiece.h"
23#include "re2/regexp.h"
24
25// Cause a link error if this file is used outside of testing.
26DECLARE_string(test_tmpdir);
27
28namespace re2 {
29
30static const char* kOpcodeNames[] = {
31  "bad",
32  "no",
33  "emp",
34  "lit",
35  "str",
36  "cat",
37  "alt",
38  "star",
39  "plus",
40  "que",
41  "rep",
42  "cap",
43  "dot",
44  "byte",
45  "bol",
46  "eol",
47  "wb",   // kRegexpWordBoundary
48  "nwb",  // kRegexpNoWordBoundary
49  "bot",
50  "eot",
51  "cc",
52  "match",
53};
54
55// Create string representation of regexp with explicit structure.
56// Nothing pretty, just for testing.
57static void DumpRegexpAppending(Regexp* re, string* s) {
58  if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
59    StringAppendF(s, "op%d", re->op());
60  } else {
61    switch (re->op()) {
62      default:
63        break;
64      case kRegexpStar:
65      case kRegexpPlus:
66      case kRegexpQuest:
67      case kRegexpRepeat:
68        if (re->parse_flags() & Regexp::NonGreedy)
69          s->append("n");
70        break;
71    }
72    s->append(kOpcodeNames[re->op()]);
73    if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) {
74      Rune r = re->rune();
75      if ('a' <= r && r <= 'z')
76        s->append("fold");
77    }
78    if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) {
79      for (int i = 0; i < re->nrunes(); i++) {
80        Rune r = re->runes()[i];
81        if ('a' <= r && r <= 'z') {
82          s->append("fold");
83          break;
84        }
85      }
86    }
87  }
88  s->append("{");
89  switch (re->op()) {
90    default:
91      break;
92    case kRegexpEndText:
93      if (!(re->parse_flags() & Regexp::WasDollar)) {
94        s->append("\\z");
95      }
96      break;
97    case kRegexpLiteral: {
98      Rune r = re->rune();
99      char buf[UTFmax+1];
100      buf[runetochar(buf, &r)] = 0;
101      s->append(buf);
102      break;
103    }
104    case kRegexpLiteralString:
105      for (int i = 0; i < re->nrunes(); i++) {
106        Rune r = re->runes()[i];
107        char buf[UTFmax+1];
108        buf[runetochar(buf, &r)] = 0;
109        s->append(buf);
110      }
111      break;
112    case kRegexpConcat:
113    case kRegexpAlternate:
114      for (int i = 0; i < re->nsub(); i++)
115        DumpRegexpAppending(re->sub()[i], s);
116      break;
117    case kRegexpStar:
118    case kRegexpPlus:
119    case kRegexpQuest:
120      DumpRegexpAppending(re->sub()[0], s);
121      break;
122    case kRegexpCapture:
123      if (re->name()) {
124        s->append(*re->name());
125        s->append(":");
126      }
127      DumpRegexpAppending(re->sub()[0], s);
128      break;
129    case kRegexpRepeat:
130      s->append(StringPrintf("%d,%d ", re->min(), re->max()));
131      DumpRegexpAppending(re->sub()[0], s);
132      break;
133    case kRegexpCharClass: {
134      string sep;
135      for (CharClass::iterator it = re->cc()->begin();
136           it != re->cc()->end(); ++it) {
137        RuneRange rr = *it;
138        s->append(sep);
139        if (rr.lo == rr.hi)
140          s->append(StringPrintf("%#x", rr.lo));
141        else
142          s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));
143        sep = " ";
144      }
145      break;
146    }
147  }
148  s->append("}");
149}
150
151string Regexp::Dump() {
152  string s;
153
154  // Make sure being called from a unit test.
155  if (FLAGS_test_tmpdir.empty()) {
156    LOG(ERROR) << "Cannot use except for testing.";
157    return s;
158  }
159
160  DumpRegexpAppending(this, &s);
161  return s;
162}
163
164}  // namespace re2
165