10d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Copyright 2006 The RE2 Authors.  All Rights Reserved.
20d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Use of this source code is governed by a BSD-style
30d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// license that can be found in the LICENSE file.
40d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
50d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Dump the regexp into a string showing structure.
60d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Tested by parse_unittest.cc
70d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
80d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// This function traverses the regexp recursively,
90d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// meaning that on inputs like Regexp::Simplify of
100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100},
110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// it takes time and space exponential in the size of the
120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// original regular expression.  It can also use stack space
130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// linear in the size of the regular expression for inputs
140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*.
150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// IT IS NOT SAFE TO CALL FROM PRODUCTION CODE.
160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// As a result, Dump is provided only in the testing
170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// library (see BUILD).
180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <string>
200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include <vector>
210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/test.h"
220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/stringpiece.h"
230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/regexp.h"
240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Cause a link error if this file is used outside of testing.
260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinDECLARE_string(test_tmpdir);
270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinnamespace re2 {
290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic const char* kOpcodeNames[] = {
310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "bad",
320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "no",
330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "emp",
340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "lit",
350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "str",
360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "cat",
370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "alt",
380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "star",
390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "plus",
400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "que",
410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "rep",
420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "cap",
430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "dot",
440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "byte",
450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "bol",
460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "eol",
470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "wb",   // kRegexpWordBoundary
480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "nwb",  // kRegexpNoWordBoundary
490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "bot",
500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "eot",
510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "cc",
520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  "match",
530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin};
540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Create string representation of regexp with explicit structure.
560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Nothing pretty, just for testing.
570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstatic void DumpRegexpAppending(Regexp* re, string* s) {
580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    StringAppendF(s, "op%d", re->op());
600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  } else {
610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    switch (re->op()) {
620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      default:
630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        break;
640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      case kRegexpStar:
650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      case kRegexpPlus:
660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      case kRegexpQuest:
670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      case kRegexpRepeat:
680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        if (re->parse_flags() & Regexp::NonGreedy)
690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin          s->append("n");
700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        break;
710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    s->append(kOpcodeNames[re->op()]);
730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) {
740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      Rune r = re->rune();
750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      if ('a' <= r && r <= 'z')
760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        s->append("fold");
770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) {
790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      for (int i = 0; i < re->nrunes(); i++) {
800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        Rune r = re->runes()[i];
810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        if ('a' <= r && r <= 'z') {
820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin          s->append("fold");
830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin          break;
840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        }
850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      }
860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  s->append("{");
890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  switch (re->op()) {
900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    default:
910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      break;
920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kRegexpEndText:
930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      if (!(re->parse_flags() & Regexp::WasDollar)) {
940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        s->append("\\z");
950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      }
960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      break;
970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kRegexpLiteral: {
980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      Rune r = re->rune();
990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      char buf[UTFmax+1];
1000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      buf[runetochar(buf, &r)] = 0;
1010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      s->append(buf);
1020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      break;
1030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
1040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kRegexpLiteralString:
1050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      for (int i = 0; i < re->nrunes(); i++) {
1060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        Rune r = re->runes()[i];
1070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        char buf[UTFmax+1];
1080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        buf[runetochar(buf, &r)] = 0;
1090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        s->append(buf);
1100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      }
1110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      break;
1120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kRegexpConcat:
1130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kRegexpAlternate:
1140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      for (int i = 0; i < re->nsub(); i++)
1150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        DumpRegexpAppending(re->sub()[i], s);
1160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      break;
1170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kRegexpStar:
1180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kRegexpPlus:
1190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kRegexpQuest:
1200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      DumpRegexpAppending(re->sub()[0], s);
1210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      break;
1220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kRegexpCapture:
1230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      if (re->name()) {
1240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        s->append(*re->name());
1250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        s->append(":");
1260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      }
1270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      DumpRegexpAppending(re->sub()[0], s);
1280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      break;
1290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kRegexpRepeat:
1300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      s->append(StringPrintf("%d,%d ", re->min(), re->max()));
1310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      DumpRegexpAppending(re->sub()[0], s);
1320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      break;
1330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kRegexpCharClass: {
1340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      string sep;
1350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      for (CharClass::iterator it = re->cc()->begin();
1360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin           it != re->cc()->end(); ++it) {
1370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        RuneRange rr = *it;
1380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        s->append(sep);
1390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        if (rr.lo == rr.hi)
1400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin          s->append(StringPrintf("%#x", rr.lo));
1410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        else
1420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin          s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));
1430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        sep = " ";
1440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      }
1450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      break;
1460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
1470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  s->append("}");
1490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
1500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinstring Regexp::Dump() {
1520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  string s;
1530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Make sure being called from a unit test.
1550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (FLAGS_test_tmpdir.empty()) {
1560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    LOG(ERROR) << "Cannot use except for testing.";
1570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    return s;
1580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  DumpRegexpAppending(this, &s);
1610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  return s;
1620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
1630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}  // namespace re2
165