15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2006 The RE2 Authors.  All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Format a regular expression structure as a string.
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Tested by parse_test.cc
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/util.h"
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/regexp.h"
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/walker-inl.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 {
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)enum {
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PrecAtom,
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PrecUnary,
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PrecConcat,
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PrecAlternate,
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PrecEmpty,
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PrecParen,
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PrecToplevel,
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Helper function.  See description below.
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void AppendCCRange(string* t, Rune lo, Rune hi);
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Walker to generate string in s_.
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The arg pointers are actually integers giving the
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// context precedence.
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The child_args are always NULL.
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class ToStringWalker : public Regexp::Walker<int> {
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  explicit ToStringWalker(string* t) : t_(t) {}
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual int PreVisit(Regexp* re, int parent_arg, bool* stop);
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg,
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        int* child_args, int nchild_args);
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual int ShortVisit(Regexp* re, int parent_arg) {
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return 0;
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string* t_;  // The string the walker appends to.
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_EVIL_CONSTRUCTORS(ToStringWalker);
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)string Regexp::ToString() {
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string t;
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ToStringWalker w(&t);
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  w.WalkExponential(this, PrecToplevel, 100000);
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (w.stopped_early())
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t += " [truncated]";
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return t;
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define ToString DontCallToString  // Avoid accidental recursion.
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Visits re before children are processed.
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Appends ( if needed and passes new precedence to children.
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int prec = parent_arg;
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nprec = PrecAtom;
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch (re->op()) {
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpNoMatch:
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpEmptyMatch:
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpLiteral:
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpAnyChar:
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpAnyByte:
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpBeginLine:
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpEndLine:
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpBeginText:
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpEndText:
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpWordBoundary:
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpNoWordBoundary:
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpCharClass:
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpHaveMatch:
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      nprec = PrecAtom;
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpConcat:
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpLiteralString:
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (prec < PrecConcat)
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append("(?:");
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      nprec = PrecConcat;
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpAlternate:
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (prec < PrecAlternate)
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append("(?:");
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      nprec = PrecAlternate;
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpCapture:
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append("(");
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (re->name()) {
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append("?P<");
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append(*re->name());
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append(">");
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      nprec = PrecParen;
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpStar:
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpPlus:
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpQuest:
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpRepeat:
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (prec < PrecUnary)
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append("(?:");
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // The subprecedence here is PrecAtom instead of PrecUnary
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // because PCRE treats two unary ops in a row as a parse error.
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      nprec = PrecAtom;
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return nprec;
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void AppendLiteral(string *t, Rune r, bool foldcase) {
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) {
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t->append(1, '\\');
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t->append(1, r);
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (foldcase && 'a' <= r && r <= 'z') {
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ('a' <= r && r <= 'z')
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      r += 'A' - 'a';
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t->append(1, '[');
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t->append(1, r);
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t->append(1, r + 'a' - 'A');
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t->append(1, ']');
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AppendCCRange(t, r, r);
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Visits re after children are processed.
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// For childless regexps, all the work is done here.
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// For regexps with children, append any unary suffixes or ).
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              int* child_args, int nchild_args) {
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int prec = parent_arg;
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch (re->op()) {
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpNoMatch:
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // There's no simple symbol for "no match", but
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // [^0-Runemax] excludes everything.
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append("[^\\x00-\\x{10ffff}]");
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpEmptyMatch:
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Append (?:) to make empty string visible,
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // unless this is already being parenthesized.
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (prec < PrecEmpty)
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append("(?:)");
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpLiteral:
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AppendLiteral(t_, re->rune(), re->parse_flags() & Regexp::FoldCase);
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpLiteralString:
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      for (int i = 0; i < re->nrunes(); i++)
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        AppendLiteral(t_, re->runes()[i], re->parse_flags() & Regexp::FoldCase);
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (prec < PrecConcat)
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append(")");
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpConcat:
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (prec < PrecConcat)
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append(")");
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpAlternate:
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Clumsy but workable: the children all appended |
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // at the end of their strings, so just remove the last one.
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if ((*t_)[t_->size()-1] == '|')
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->erase(t_->size()-1);
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      else
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        LOG(DFATAL) << "Bad final char: " << t_;
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (prec < PrecAlternate)
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append(")");
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpStar:
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append("*");
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (re->parse_flags() & Regexp::NonGreedy)
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append("?");
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (prec < PrecUnary)
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append(")");
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpPlus:
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append("+");
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (re->parse_flags() & Regexp::NonGreedy)
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append("?");
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (prec < PrecUnary)
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append(")");
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpQuest:
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append("?");
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (re->parse_flags() & Regexp::NonGreedy)
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append("?");
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (prec < PrecUnary)
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append(")");
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpRepeat:
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (re->max() == -1)
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append(StringPrintf("{%d,}", re->min()));
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      else if (re->min() == re->max())
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append(StringPrintf("{%d}", re->min()));
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      else
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append(StringPrintf("{%d,%d}", re->min(), re->max()));
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (re->parse_flags() & Regexp::NonGreedy)
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append("?");
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (prec < PrecUnary)
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append(")");
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpAnyChar:
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append(".");
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpAnyByte:
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append("\\C");
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpBeginLine:
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append("^");
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpEndLine:
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append("$");
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpBeginText:
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append("(?-m:^)");
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpEndText:
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (re->parse_flags() & Regexp::WasDollar)
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append("(?-m:$)");
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      else
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append("\\z");
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpWordBoundary:
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append("\\b");
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpNoWordBoundary:
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append("\\B");
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpCharClass: {
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (re->cc()->size() == 0) {
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append("[^\\x00-\\x{10ffff}]");
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        break;
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append("[");
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Heuristic: show class as negated if it contains the
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // non-character 0xFFFE.
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CharClass* cc = re->cc();
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (cc->Contains(0xFFFE)) {
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        cc = cc->Negate();
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        t_->append("^");
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i)
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        AppendCCRange(t_, i->lo, i->hi);
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (cc != re->cc())
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        cc->Delete();
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append("]");
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpCapture:
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append(")");
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kRegexpHaveMatch:
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // There's no syntax accepted by the parser to generate
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // this node (it is generated by RE2::Set) so make something
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // up that is readable but won't compile.
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t_->append("(?HaveMatch:%d)", re->match_id());
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If the parent is an alternation, append the | for it.
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (prec == PrecAlternate)
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t_->append("|");
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return 0;
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Appends a rune for use in a character class to the string t.
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void AppendCCChar(string* t, Rune r) {
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (0x20 <= r && r <= 0x7E) {
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (strchr("[]^-\\", r))
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t->append("\\");
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t->append(1, r);
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch (r) {
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    default:
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case '\r':
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t->append("\\r");
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case '\t':
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t->append("\\t");
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case '\n':
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t->append("\\n");
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case '\f':
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t->append("\\f");
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (r < 0x100) {
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    StringAppendF(t, "\\x%02x", static_cast<int>(r));
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringAppendF(t, "\\x{%x}", static_cast<int>(r));
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void AppendCCRange(string* t, Rune lo, Rune hi) {
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (lo > hi)
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  AppendCCChar(t, lo);
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (lo < hi) {
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t->append("-");
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AppendCCChar(t, hi);
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace re2
342