12ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Copyright 2006 The RE2 Authors.  All Rights Reserved.
22ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Use of this source code is governed by a BSD-style
32ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// license that can be found in the LICENSE file.
42ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
52ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Format a regular expression structure as a string.
62ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Tested by parse_test.cc
72ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
82ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/util.h"
92ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/regexp.h"
102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/walker-inl.h"
112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonnamespace re2 {
132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonenum {
152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  PrecAtom,
162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  PrecUnary,
172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  PrecConcat,
182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  PrecAlternate,
192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  PrecEmpty,
202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  PrecParen,
212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  PrecToplevel,
222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson};
232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Helper function.  See description below.
252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic void AppendCCRange(string* t, Rune lo, Rune hi);
262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Walker to generate string in s_.
282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The arg pointers are actually integers giving the
292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// context precedence.
302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The child_args are always NULL.
312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonclass ToStringWalker : public Regexp::Walker<int> {
322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson public:
332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  explicit ToStringWalker(string* t) : t_(t) {}
342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  virtual int PreVisit(Regexp* re, int parent_arg, bool* stop);
362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg,
372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson                        int* child_args, int nchild_args);
382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  virtual int ShortVisit(Regexp* re, int parent_arg) {
392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return 0;
402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson private:
432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  string* t_;  // The string the walker appends to.
442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  DISALLOW_EVIL_CONSTRUCTORS(ToStringWalker);
462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson};
472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstring Regexp::ToString() {
492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  string t;
502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  ToStringWalker w(&t);
512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  w.WalkExponential(this, PrecToplevel, 100000);
522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (w.stopped_early())
532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    t += " [truncated]";
542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return t;
552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#define ToString DontCallToString  // Avoid accidental recursion.
582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Visits re before children are processed.
602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Appends ( if needed and passes new precedence to children.
612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int prec = parent_arg;
632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int nprec = PrecAtom;
642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  switch (re->op()) {
662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpNoMatch:
672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpEmptyMatch:
682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpLiteral:
692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpAnyChar:
702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpAnyByte:
712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpBeginLine:
722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpEndLine:
732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpBeginText:
742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpEndText:
752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpWordBoundary:
762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpNoWordBoundary:
772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpCharClass:
782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpHaveMatch:
792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      nprec = PrecAtom;
802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpConcat:
832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpLiteralString:
842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (prec < PrecConcat)
852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append("(?:");
862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      nprec = PrecConcat;
872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpAlternate:
902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (prec < PrecAlternate)
912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append("(?:");
922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      nprec = PrecAlternate;
932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpCapture:
962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append("(");
972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (re->name()) {
982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append("?P<");
992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append(*re->name());
1002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append(">");
1012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      }
1022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      nprec = PrecParen;
1032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
1042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpStar:
1062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpPlus:
1072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpQuest:
1082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpRepeat:
1092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (prec < PrecUnary)
1102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append("(?:");
1112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // The subprecedence here is PrecAtom instead of PrecUnary
1122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // because PCRE treats two unary ops in a row as a parse error.
1132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      nprec = PrecAtom;
1142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
1152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return nprec;
1182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic void AppendLiteral(string *t, Rune r, bool foldcase) {
1212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) {
1222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    t->append(1, '\\');
1232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    t->append(1, r);
1242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  } else if (foldcase && 'a' <= r && r <= 'z') {
1252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if ('a' <= r && r <= 'z')
1262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      r += 'A' - 'a';
1272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    t->append(1, '[');
1282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    t->append(1, r);
1292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    t->append(1, r + 'a' - 'A');
1302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    t->append(1, ']');
1312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  } else {
1322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    AppendCCRange(t, r, r);
1332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Visits re after children are processed.
1372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// For childless regexps, all the work is done here.
1382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// For regexps with children, append any unary suffixes or ).
1392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
1402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson                              int* child_args, int nchild_args) {
1412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int prec = parent_arg;
1422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  switch (re->op()) {
1432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpNoMatch:
1442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // There's no simple symbol for "no match", but
1452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // [^0-Runemax] excludes everything.
1462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append("[^\\x00-\\x{10ffff}]");
1472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
1482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpEmptyMatch:
1502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // Append (?:) to make empty string visible,
1512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // unless this is already being parenthesized.
1522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (prec < PrecEmpty)
1532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append("(?:)");
1542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
1552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpLiteral:
1572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      AppendLiteral(t_, re->rune(), re->parse_flags() & Regexp::FoldCase);
1582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
1592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpLiteralString:
1612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      for (int i = 0; i < re->nrunes(); i++)
1622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        AppendLiteral(t_, re->runes()[i], re->parse_flags() & Regexp::FoldCase);
1632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (prec < PrecConcat)
1642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append(")");
1652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
1662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpConcat:
1682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (prec < PrecConcat)
1692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append(")");
1702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
1712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpAlternate:
1732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // Clumsy but workable: the children all appended |
1742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // at the end of their strings, so just remove the last one.
1752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if ((*t_)[t_->size()-1] == '|')
1762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->erase(t_->size()-1);
1772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      else
1782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        LOG(DFATAL) << "Bad final char: " << t_;
1792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (prec < PrecAlternate)
1802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append(")");
1812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
1822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpStar:
1842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append("*");
1852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (re->parse_flags() & Regexp::NonGreedy)
1862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append("?");
1872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (prec < PrecUnary)
1882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append(")");
1892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
1902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpPlus:
1922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append("+");
1932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (re->parse_flags() & Regexp::NonGreedy)
1942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append("?");
1952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (prec < PrecUnary)
1962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append(")");
1972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
1982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpQuest:
2002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append("?");
2012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (re->parse_flags() & Regexp::NonGreedy)
2022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append("?");
2032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (prec < PrecUnary)
2042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append(")");
2052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
2062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpRepeat:
2082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (re->max() == -1)
2092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append(StringPrintf("{%d,}", re->min()));
2102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      else if (re->min() == re->max())
2112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append(StringPrintf("{%d}", re->min()));
2122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      else
2132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append(StringPrintf("{%d,%d}", re->min(), re->max()));
2142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (re->parse_flags() & Regexp::NonGreedy)
2152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append("?");
2162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (prec < PrecUnary)
2172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append(")");
2182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
2192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpAnyChar:
2212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append(".");
2222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
2232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpAnyByte:
2252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append("\\C");
2262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
2272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpBeginLine:
2292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append("^");
2302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
2312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpEndLine:
2332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append("$");
2342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
2352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpBeginText:
2372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append("(?-m:^)");
2382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
2392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpEndText:
2412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (re->parse_flags() & Regexp::WasDollar)
2422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append("(?-m:$)");
2432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      else
2442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append("\\z");
2452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
2462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpWordBoundary:
2482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append("\\b");
2492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
2502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpNoWordBoundary:
2522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append("\\B");
2532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
2542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpCharClass: {
2562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (re->cc()->size() == 0) {
2572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append("[^\\x00-\\x{10ffff}]");
2582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        break;
2592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      }
2602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append("[");
2612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // Heuristic: show class as negated if it contains the
2622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // non-character 0xFFFE.
2632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      CharClass* cc = re->cc();
2642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (cc->Contains(0xFFFE)) {
2652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        cc = cc->Negate();
2662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        t_->append("^");
2672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      }
2682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i)
2692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        AppendCCRange(t_, i->lo, i->hi);
2702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (cc != re->cc())
2712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        cc->Delete();
2722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append("]");
2732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
2742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    }
2752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpCapture:
2772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append(")");
2782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
2792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpHaveMatch:
2812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // There's no syntax accepted by the parser to generate
2822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // this node (it is generated by RE2::Set) so make something
2832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // up that is readable but won't compile.
2842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t_->append("(?HaveMatch:%d)", re->match_id());
2852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
2862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
2872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // If the parent is an alternation, append the | for it.
2892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (prec == PrecAlternate)
2902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    t_->append("|");
2912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return 0;
2932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
2942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Appends a rune for use in a character class to the string t.
2962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic void AppendCCChar(string* t, Rune r) {
2972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (0x20 <= r && r <= 0x7E) {
2982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if (strchr("[]^-\\", r))
2992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t->append("\\");
3002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    t->append(1, r);
3012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return;
3022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
3032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  switch (r) {
3042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    default:
3052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
3062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case '\r':
3082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t->append("\\r");
3092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      return;
3102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case '\t':
3122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t->append("\\t");
3132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      return;
3142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case '\n':
3162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t->append("\\n");
3172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      return;
3182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case '\f':
3202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      t->append("\\f");
3212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      return;
3222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
3232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (r < 0x100) {
3252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    StringAppendF(t, "\\x%02x", static_cast<int>(r));
3262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return;
3272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
3282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  StringAppendF(t, "\\x{%x}", static_cast<int>(r));
3292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
3302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic void AppendCCRange(string* t, Rune lo, Rune hi) {
3322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (lo > hi)
3332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return;
3342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  AppendCCChar(t, lo);
3352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (lo < hi) {
3362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    t->append("-");
3372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    AppendCCChar(t, hi);
3382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
3392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
3402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}  // namespace re2
342