12ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Copyright 2006 The RE2 Authors. All Rights Reserved. 22ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Use of this source code is governed by a BSD-style 32ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// license that can be found in the LICENSE file. 42ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 52ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Format a regular expression structure as a string. 62ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Tested by parse_test.cc 72ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 82ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/util.h" 92ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/regexp.h" 102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/walker-inl.h" 112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonnamespace re2 { 132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonenum { 152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PrecAtom, 162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PrecUnary, 172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PrecConcat, 182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PrecAlternate, 192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PrecEmpty, 202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PrecParen, 212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson PrecToplevel, 222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}; 232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Helper function. See description below. 252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic void AppendCCRange(string* t, Rune lo, Rune hi); 262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Walker to generate string in s_. 282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The arg pointers are actually integers giving the 292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// context precedence. 302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The child_args are always NULL. 312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonclass ToStringWalker : public Regexp::Walker<int> { 322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson public: 332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson explicit ToStringWalker(string* t) : t_(t) {} 342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); 362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, 372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int* child_args, int nchild_args); 382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson virtual int ShortVisit(Regexp* re, int parent_arg) { 392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 0; 402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson private: 432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string* t_; // The string the walker appends to. 442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson DISALLOW_EVIL_CONSTRUCTORS(ToStringWalker); 462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}; 472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstring Regexp::ToString() { 492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string t; 502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ToStringWalker w(&t); 512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson w.WalkExponential(this, PrecToplevel, 100000); 522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (w.stopped_early()) 532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t += " [truncated]"; 542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return t; 552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#define ToString DontCallToString // Avoid accidental recursion. 582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Visits re before children are processed. 602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Appends ( if needed and passes new precedence to children. 612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) { 622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int prec = parent_arg; 632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int nprec = PrecAtom; 642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson switch (re->op()) { 662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpNoMatch: 672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpEmptyMatch: 682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpLiteral: 692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpAnyChar: 702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpAnyByte: 712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpBeginLine: 722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpEndLine: 732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpBeginText: 742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpEndText: 752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpWordBoundary: 762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpNoWordBoundary: 772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpCharClass: 782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpHaveMatch: 792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson nprec = PrecAtom; 802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpConcat: 832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpLiteralString: 842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prec < PrecConcat) 852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("(?:"); 862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson nprec = PrecConcat; 872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpAlternate: 902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prec < PrecAlternate) 912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("(?:"); 922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson nprec = PrecAlternate; 932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpCapture: 962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("("); 972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re->name()) { 982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("?P<"); 992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append(*re->name()); 1002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append(">"); 1012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson nprec = PrecParen; 1032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 1042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpStar: 1062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpPlus: 1072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpQuest: 1082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpRepeat: 1092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prec < PrecUnary) 1102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("(?:"); 1112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // The subprecedence here is PrecAtom instead of PrecUnary 1122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // because PCRE treats two unary ops in a row as a parse error. 1132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson nprec = PrecAtom; 1142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 1152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return nprec; 1182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic void AppendLiteral(string *t, Rune r, bool foldcase) { 1212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) { 1222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t->append(1, '\\'); 1232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t->append(1, r); 1242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else if (foldcase && 'a' <= r && r <= 'z') { 1252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ('a' <= r && r <= 'z') 1262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson r += 'A' - 'a'; 1272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t->append(1, '['); 1282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t->append(1, r); 1292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t->append(1, r + 'a' - 'A'); 1302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t->append(1, ']'); 1312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 1322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson AppendCCRange(t, r, r); 1332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Visits re after children are processed. 1372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// For childless regexps, all the work is done here. 1382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// For regexps with children, append any unary suffixes or ). 1392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, 1402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int* child_args, int nchild_args) { 1412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int prec = parent_arg; 1422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson switch (re->op()) { 1432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpNoMatch: 1442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // There's no simple symbol for "no match", but 1452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // [^0-Runemax] excludes everything. 1462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("[^\\x00-\\x{10ffff}]"); 1472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 1482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpEmptyMatch: 1502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Append (?:) to make empty string visible, 1512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // unless this is already being parenthesized. 1522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prec < PrecEmpty) 1532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("(?:)"); 1542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 1552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpLiteral: 1572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson AppendLiteral(t_, re->rune(), re->parse_flags() & Regexp::FoldCase); 1582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 1592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpLiteralString: 1612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = 0; i < re->nrunes(); i++) 1622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson AppendLiteral(t_, re->runes()[i], re->parse_flags() & Regexp::FoldCase); 1632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prec < PrecConcat) 1642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append(")"); 1652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 1662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpConcat: 1682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prec < PrecConcat) 1692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append(")"); 1702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 1712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpAlternate: 1732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Clumsy but workable: the children all appended | 1742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // at the end of their strings, so just remove the last one. 1752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ((*t_)[t_->size()-1] == '|') 1762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->erase(t_->size()-1); 1772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson else 1782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(DFATAL) << "Bad final char: " << t_; 1792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prec < PrecAlternate) 1802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append(")"); 1812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 1822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpStar: 1842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("*"); 1852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re->parse_flags() & Regexp::NonGreedy) 1862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("?"); 1872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prec < PrecUnary) 1882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append(")"); 1892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 1902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpPlus: 1922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("+"); 1932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re->parse_flags() & Regexp::NonGreedy) 1942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("?"); 1952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prec < PrecUnary) 1962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append(")"); 1972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 1982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpQuest: 2002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("?"); 2012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re->parse_flags() & Regexp::NonGreedy) 2022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("?"); 2032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prec < PrecUnary) 2042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append(")"); 2052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 2062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpRepeat: 2082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re->max() == -1) 2092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append(StringPrintf("{%d,}", re->min())); 2102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson else if (re->min() == re->max()) 2112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append(StringPrintf("{%d}", re->min())); 2122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson else 2132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append(StringPrintf("{%d,%d}", re->min(), re->max())); 2142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re->parse_flags() & Regexp::NonGreedy) 2152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("?"); 2162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prec < PrecUnary) 2172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append(")"); 2182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 2192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpAnyChar: 2212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("."); 2222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 2232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpAnyByte: 2252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("\\C"); 2262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 2272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpBeginLine: 2292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("^"); 2302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 2312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpEndLine: 2332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("$"); 2342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 2352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpBeginText: 2372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("(?-m:^)"); 2382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 2392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpEndText: 2412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re->parse_flags() & Regexp::WasDollar) 2422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("(?-m:$)"); 2432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson else 2442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("\\z"); 2452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 2462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpWordBoundary: 2482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("\\b"); 2492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 2502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpNoWordBoundary: 2522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("\\B"); 2532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 2542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpCharClass: { 2562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re->cc()->size() == 0) { 2572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("[^\\x00-\\x{10ffff}]"); 2582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 2592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("["); 2612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Heuristic: show class as negated if it contains the 2622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // non-character 0xFFFE. 2632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson CharClass* cc = re->cc(); 2642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (cc->Contains(0xFFFE)) { 2652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson cc = cc->Negate(); 2662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("^"); 2672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i) 2692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson AppendCCRange(t_, i->lo, i->hi); 2702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (cc != re->cc()) 2712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson cc->Delete(); 2722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("]"); 2732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 2742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpCapture: 2772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append(")"); 2782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 2792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpHaveMatch: 2812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // There's no syntax accepted by the parser to generate 2822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // this node (it is generated by RE2::Set) so make something 2832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // up that is readable but won't compile. 2842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("(?HaveMatch:%d)", re->match_id()); 2852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 2862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If the parent is an alternation, append the | for it. 2892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (prec == PrecAlternate) 2902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t_->append("|"); 2912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return 0; 2932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Appends a rune for use in a character class to the string t. 2962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic void AppendCCChar(string* t, Rune r) { 2972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (0x20 <= r && r <= 0x7E) { 2982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (strchr("[]^-\\", r)) 2992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t->append("\\"); 3002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t->append(1, r); 3012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return; 3022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson switch (r) { 3042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson default: 3052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 3062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case '\r': 3082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t->append("\\r"); 3092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return; 3102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case '\t': 3122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t->append("\\t"); 3132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return; 3142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case '\n': 3162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t->append("\\n"); 3172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return; 3182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case '\f': 3202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t->append("\\f"); 3212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return; 3222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (r < 0x100) { 3252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson StringAppendF(t, "\\x%02x", static_cast<int>(r)); 3262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return; 3272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson StringAppendF(t, "\\x{%x}", static_cast<int>(r)); 3292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic void AppendCCRange(string* t, Rune lo, Rune hi) { 3322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (lo > hi) 3332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return; 3342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson AppendCCChar(t, lo); 3352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (lo < hi) { 3362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson t->append("-"); 3372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson AppendCCChar(t, hi); 3382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} // namespace re2 342