prefilter.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
142627b850c8f68a594f105e04b97c512b292b698Ben Gruver// Copyright 2009 The RE2 Authors.  All Rights Reserved.
242627b850c8f68a594f105e04b97c512b292b698Ben Gruver// Use of this source code is governed by a BSD-style
342627b850c8f68a594f105e04b97c512b292b698Ben Gruver// license that can be found in the LICENSE file.
442627b850c8f68a594f105e04b97c512b292b698Ben Gruver
542627b850c8f68a594f105e04b97c512b292b698Ben Gruver#include "util/util.h"
642627b850c8f68a594f105e04b97c512b292b698Ben Gruver#include "re2/prefilter.h"
742627b850c8f68a594f105e04b97c512b292b698Ben Gruver#include "re2/re2.h"
842627b850c8f68a594f105e04b97c512b292b698Ben Gruver#include "re2/unicode_casefold.h"
942627b850c8f68a594f105e04b97c512b292b698Ben Gruver#include "re2/walker-inl.h"
1042627b850c8f68a594f105e04b97c512b292b698Ben Gruver
1142627b850c8f68a594f105e04b97c512b292b698Ben Gruvernamespace re2 {
1242627b850c8f68a594f105e04b97c512b292b698Ben Gruver
1342627b850c8f68a594f105e04b97c512b292b698Ben Gruverstatic const int Trace = false;
1442627b850c8f68a594f105e04b97c512b292b698Ben Gruver
1542627b850c8f68a594f105e04b97c512b292b698Ben Gruvertypedef set<string>::iterator SSIter;
1642627b850c8f68a594f105e04b97c512b292b698Ben Gruvertypedef set<string>::const_iterator ConstSSIter;
1742627b850c8f68a594f105e04b97c512b292b698Ben Gruver
1842627b850c8f68a594f105e04b97c512b292b698Ben Gruverstatic int alloc_id = 100000;  // Used for debugging.
1942627b850c8f68a594f105e04b97c512b292b698Ben Gruver// Initializes a Prefilter, allocating subs_ as necessary.
2042627b850c8f68a594f105e04b97c512b292b698Ben GruverPrefilter::Prefilter(Op op) {
2142627b850c8f68a594f105e04b97c512b292b698Ben Gruver  op_ = op;
2242627b850c8f68a594f105e04b97c512b292b698Ben Gruver  subs_ = NULL;
2342627b850c8f68a594f105e04b97c512b292b698Ben Gruver  if (op_ == AND || op_ == OR)
2442627b850c8f68a594f105e04b97c512b292b698Ben Gruver    subs_ = new vector<Prefilter*>;
2542627b850c8f68a594f105e04b97c512b292b698Ben Gruver
2642627b850c8f68a594f105e04b97c512b292b698Ben Gruver  alloc_id_ = alloc_id++;
2742627b850c8f68a594f105e04b97c512b292b698Ben Gruver  VLOG(10) << "alloc_id: " << alloc_id_;
2842627b850c8f68a594f105e04b97c512b292b698Ben Gruver}
2942627b850c8f68a594f105e04b97c512b292b698Ben Gruver
3042627b850c8f68a594f105e04b97c512b292b698Ben Gruver// Destroys a Prefilter.
3142627b850c8f68a594f105e04b97c512b292b698Ben GruverPrefilter::~Prefilter() {
3242627b850c8f68a594f105e04b97c512b292b698Ben Gruver  VLOG(10) << "Deleted: " << alloc_id_;
3342627b850c8f68a594f105e04b97c512b292b698Ben Gruver  if (subs_) {
3442627b850c8f68a594f105e04b97c512b292b698Ben Gruver    for (int i = 0; i < subs_->size(); i++)
3542627b850c8f68a594f105e04b97c512b292b698Ben Gruver      delete (*subs_)[i];
3642627b850c8f68a594f105e04b97c512b292b698Ben Gruver    delete subs_;
3742627b850c8f68a594f105e04b97c512b292b698Ben Gruver    subs_ = NULL;
3842627b850c8f68a594f105e04b97c512b292b698Ben Gruver  }
3942627b850c8f68a594f105e04b97c512b292b698Ben Gruver}
4042627b850c8f68a594f105e04b97c512b292b698Ben Gruver
4142627b850c8f68a594f105e04b97c512b292b698Ben Gruver// Simplify if the node is an empty Or or And.
4242627b850c8f68a594f105e04b97c512b292b698Ben GruverPrefilter* Prefilter::Simplify() {
4342627b850c8f68a594f105e04b97c512b292b698Ben Gruver  if (op_ != AND && op_ != OR) {
4442627b850c8f68a594f105e04b97c512b292b698Ben Gruver    return this;
4542627b850c8f68a594f105e04b97c512b292b698Ben Gruver  }
4642627b850c8f68a594f105e04b97c512b292b698Ben Gruver
4742627b850c8f68a594f105e04b97c512b292b698Ben Gruver  // Nothing left in the AND/OR.
4842627b850c8f68a594f105e04b97c512b292b698Ben Gruver  if (subs_->size() == 0) {
4942627b850c8f68a594f105e04b97c512b292b698Ben Gruver    if (op_ == AND)
5042627b850c8f68a594f105e04b97c512b292b698Ben Gruver      op_ = ALL;  // AND of nothing is true
5142627b850c8f68a594f105e04b97c512b292b698Ben Gruver    else
5242627b850c8f68a594f105e04b97c512b292b698Ben Gruver      op_ = NONE;  // OR of nothing is false
5342627b850c8f68a594f105e04b97c512b292b698Ben Gruver
5442627b850c8f68a594f105e04b97c512b292b698Ben Gruver    return this;
5542627b850c8f68a594f105e04b97c512b292b698Ben Gruver  }
5642627b850c8f68a594f105e04b97c512b292b698Ben Gruver
5742627b850c8f68a594f105e04b97c512b292b698Ben Gruver  // Just one subnode: throw away wrapper.
5842627b850c8f68a594f105e04b97c512b292b698Ben Gruver  if (subs_->size() == 1) {
59    Prefilter* a = (*subs_)[0];
60    subs_->clear();
61    delete this;
62    return a->Simplify();
63  }
64
65  return this;
66}
67
68// Combines two Prefilters together to create an "op" (AND or OR).
69// The passed Prefilters will be part of the returned Prefilter or deleted.
70// Does lots of work to avoid creating unnecessarily complicated structures.
71Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) {
72  // If a, b can be rewritten as op, do so.
73  a = a->Simplify();
74  b = b->Simplify();
75
76  // Canonicalize: a->op <= b->op.
77  if (a->op() > b->op()) {
78    Prefilter* t = a;
79    a = b;
80    b = t;
81  }
82
83  // Trivial cases.
84  //    ALL AND b = b
85  //    NONE OR b = b
86  //    ALL OR b   = ALL
87  //    NONE AND b = NONE
88  // Don't need to look at b, because of canonicalization above.
89  // ALL and NONE are smallest opcodes.
90  if (a->op() == ALL || a->op() == NONE) {
91    if ((a->op() == ALL && op == AND) ||
92        (a->op() == NONE && op == OR)) {
93      delete a;
94      return b;
95    } else {
96      delete b;
97      return a;
98    }
99  }
100
101  // If a and b match op, merge their contents.
102  if (a->op() == op && b->op() == op) {
103    for (int i = 0; i < b->subs()->size(); i++) {
104      Prefilter* bb = (*b->subs())[i];
105      a->subs()->push_back(bb);
106    }
107    b->subs()->clear();
108    delete b;
109    return a;
110  }
111
112  // If a already has the same op as the op that is under construction
113  // add in b (similarly if b already has the same op, add in a).
114  if (b->op() == op) {
115    Prefilter* t = a;
116    a = b;
117    b = t;
118  }
119  if (a->op() == op) {
120    a->subs()->push_back(b);
121    return a;
122  }
123
124  // Otherwise just return the op.
125  Prefilter* c = new Prefilter(op);
126  c->subs()->push_back(a);
127  c->subs()->push_back(b);
128  return c;
129}
130
131Prefilter* Prefilter::And(Prefilter* a, Prefilter* b) {
132  return AndOr(AND, a, b);
133}
134
135Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {
136  return AndOr(OR, a, b);
137}
138
139static void SimplifyStringSet(set<string> *ss) {
140  // Now make sure that the strings aren't redundant.  For example, if
141  // we know "ab" is a required string, then it doesn't help at all to
142  // know that "abc" is also a required string, so delete "abc". This
143  // is because, when we are performing a string search to filter
144  // regexps, matching ab will already allow this regexp to be a
145  // candidate for match, so further matching abc is redundant.
146
147  for (SSIter i = ss->begin(); i != ss->end(); ++i) {
148    SSIter j = i;
149    ++j;
150    while (j != ss->end()) {
151      // Increment j early so that we can erase the element it points to.
152      SSIter old_j = j;
153      ++j;
154      if (old_j->find(*i) != string::npos)
155        ss->erase(old_j);
156    }
157  }
158}
159
160Prefilter* Prefilter::OrStrings(set<string>* ss) {
161  SimplifyStringSet(ss);
162  Prefilter* or_prefilter = NULL;
163  if (!ss->empty()) {
164    or_prefilter = new Prefilter(NONE);
165    for (SSIter i = ss->begin(); i != ss->end(); ++i)
166      or_prefilter = Or(or_prefilter, FromString(*i));
167  }
168  return or_prefilter;
169}
170
171static Rune ToLowerRune(Rune r) {
172  if (r < Runeself) {
173    if ('A' <= r && r <= 'Z')
174      r += 'a' - 'A';
175    return r;
176  }
177
178  CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r);
179  if (f == NULL || r < f->lo)
180    return r;
181  return ApplyFold(f, r);
182}
183
184static Rune ToLowerRuneLatin1(Rune r) {
185  if ('A' <= r && r <= 'Z')
186    r += 'a' - 'A';
187  return r;
188}
189
190Prefilter* Prefilter::FromString(const string& str) {
191  Prefilter* m = new Prefilter(Prefilter::ATOM);
192  m->atom_ = str;
193  return m;
194}
195
196// Information about a regexp used during computation of Prefilter.
197// Can be thought of as information about the set of strings matching
198// the given regular expression.
199class Prefilter::Info {
200 public:
201  Info();
202  ~Info();
203
204  // More constructors.  They delete their Info* arguments.
205  static Info* Alt(Info* a, Info* b);
206  static Info* Concat(Info* a, Info* b);
207  static Info* And(Info* a, Info* b);
208  static Info* Star(Info* a);
209  static Info* Plus(Info* a);
210  static Info* Quest(Info* a);
211  static Info* EmptyString();
212  static Info* NoMatch();
213  static Info* AnyChar();
214  static Info* CClass(CharClass* cc, bool latin1);
215  static Info* Literal(Rune r);
216  static Info* LiteralLatin1(Rune r);
217  static Info* AnyMatch();
218
219  // Format Info as a string.
220  string ToString();
221
222  // Caller takes ownership of the Prefilter.
223  Prefilter* TakeMatch();
224
225  set<string>& exact() { return exact_; }
226
227  bool is_exact() const { return is_exact_; }
228
229  class Walker;
230
231 private:
232  set<string> exact_;
233
234  // When is_exact_ is true, the strings that match
235  // are placed in exact_. When it is no longer an exact
236  // set of strings that match this RE, then is_exact_
237  // is false and the match_ contains the required match
238  // criteria.
239  bool is_exact_;
240
241  // Accumulated Prefilter query that any
242  // match for this regexp is guaranteed to match.
243  Prefilter* match_;
244};
245
246
247Prefilter::Info::Info()
248  : is_exact_(false),
249    match_(NULL) {
250}
251
252Prefilter::Info::~Info() {
253  delete match_;
254}
255
256Prefilter* Prefilter::Info::TakeMatch() {
257  if (is_exact_) {
258    match_ = Prefilter::OrStrings(&exact_);
259    is_exact_ = false;
260  }
261  Prefilter* m = match_;
262  match_ = NULL;
263  return m;
264}
265
266// Format a Info in string form.
267string Prefilter::Info::ToString() {
268  if (this == NULL) {
269    // Sometimes when iterating on children of a node,
270    // some children might have NULL Info. Adding
271    // the check here for NULL to take care of cases where
272    // the caller is not checking.
273    return "";
274  }
275
276  if (is_exact_) {
277    int n = 0;
278    string s;
279    for (set<string>::iterator i = exact_.begin(); i != exact_.end(); ++i) {
280      if (n++ > 0)
281        s += ",";
282      s += *i;
283    }
284    return s;
285  }
286
287  if (match_)
288    return match_->DebugString();
289
290  return "";
291}
292
293// Add the strings from src to dst.
294static void CopyIn(const set<string>& src, set<string>* dst) {
295  for (ConstSSIter i = src.begin(); i != src.end(); ++i)
296    dst->insert(*i);
297}
298
299// Add the cross-product of a and b to dst.
300// (For each string i in a and j in b, add i+j.)
301static void CrossProduct(const set<string>& a,
302                         const set<string>& b,
303                         set<string>* dst) {
304  for (ConstSSIter i = a.begin(); i != a.end(); ++i)
305    for (ConstSSIter j = b.begin(); j != b.end(); ++j)
306      dst->insert(*i + *j);
307}
308
309// Concats a and b. Requires that both are exact sets.
310// Forms an exact set that is a crossproduct of a and b.
311Prefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) {
312  if (a == NULL)
313    return b;
314  DCHECK(a->is_exact_);
315  DCHECK(b && b->is_exact_);
316  Info *ab = new Info();
317
318  CrossProduct(a->exact_, b->exact_, &ab->exact_);
319  ab->is_exact_ = true;
320
321  delete a;
322  delete b;
323  return ab;
324}
325
326// Constructs an inexact Info for ab given a and b.
327// Used only when a or b is not exact or when the
328// exact cross product is likely to be too big.
329Prefilter::Info* Prefilter::Info::And(Info* a, Info* b) {
330  if (a == NULL)
331    return b;
332  if (b == NULL)
333    return a;
334
335  Info *ab = new Info();
336
337  ab->match_ = Prefilter::And(a->TakeMatch(), b->TakeMatch());
338  ab->is_exact_ = false;
339  delete a;
340  delete b;
341  return ab;
342}
343
344// Constructs Info for a|b given a and b.
345Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) {
346  Info *ab = new Info();
347
348  if (a->is_exact_ && b->is_exact_) {
349    CopyIn(a->exact_, &ab->exact_);
350    CopyIn(b->exact_, &ab->exact_);
351    ab->is_exact_ = true;
352  } else {
353    // Either a or b has is_exact_ = false. If the other
354    // one has is_exact_ = true, we move it to match_ and
355    // then create a OR of a,b. The resulting Info has
356    // is_exact_ = false.
357    ab->match_ = Prefilter::Or(a->TakeMatch(), b->TakeMatch());
358    ab->is_exact_ = false;
359  }
360
361  delete a;
362  delete b;
363  return ab;
364}
365
366// Constructs Info for a? given a.
367Prefilter::Info* Prefilter::Info::Quest(Info *a) {
368  Info *ab = new Info();
369
370  ab->is_exact_ = false;
371  ab->match_ = new Prefilter(ALL);
372  delete a;
373  return ab;
374}
375
376// Constructs Info for a* given a.
377// Same as a? -- not much to do.
378Prefilter::Info* Prefilter::Info::Star(Info *a) {
379  return Quest(a);
380}
381
382// Constructs Info for a+ given a. If a was exact set, it isn't
383// anymore.
384Prefilter::Info* Prefilter::Info::Plus(Info *a) {
385  Info *ab = new Info();
386
387  ab->match_ = a->TakeMatch();
388  ab->is_exact_ = false;
389
390  delete a;
391  return ab;
392}
393
394static string RuneToString(Rune r) {
395  char buf[UTFmax];
396  int n = runetochar(buf, &r);
397  return string(buf, n);
398}
399
400static string RuneToStringLatin1(Rune r) {
401  char c = r & 0xff;
402  return string(&c, 1);
403}
404
405// Constructs Info for literal rune.
406Prefilter::Info* Prefilter::Info::Literal(Rune r) {
407  Info* info = new Info();
408  info->exact_.insert(RuneToString(ToLowerRune(r)));
409  info->is_exact_ = true;
410  return info;
411}
412
413// Constructs Info for literal rune for Latin1 encoded string.
414Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) {
415  Info* info = new Info();
416  info->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));
417  info->is_exact_ = true;
418  return info;
419}
420
421// Constructs Info for dot (any character).
422Prefilter::Info* Prefilter::Info::AnyChar() {
423  Prefilter::Info* info = new Prefilter::Info();
424  info->match_ = new Prefilter(ALL);
425  return info;
426}
427
428// Constructs Prefilter::Info for no possible match.
429Prefilter::Info* Prefilter::Info::NoMatch() {
430  Prefilter::Info* info = new Prefilter::Info();
431  info->match_ = new Prefilter(NONE);
432  return info;
433}
434
435// Constructs Prefilter::Info for any possible match.
436// This Prefilter::Info is valid for any regular expression,
437// since it makes no assertions whatsoever about the
438// strings being matched.
439Prefilter::Info* Prefilter::Info::AnyMatch() {
440  Prefilter::Info *info = new Prefilter::Info();
441  info->match_ = new Prefilter(ALL);
442  return info;
443}
444
445// Constructs Prefilter::Info for just the empty string.
446Prefilter::Info* Prefilter::Info::EmptyString() {
447  Prefilter::Info* info = new Prefilter::Info();
448  info->is_exact_ = true;
449  info->exact_.insert("");
450  return info;
451}
452
453// Constructs Prefilter::Info for a character class.
454typedef CharClass::iterator CCIter;
455Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
456                                         bool latin1) {
457  if (Trace) {
458    VLOG(0) << "CharClassInfo:";
459    for (CCIter i = cc->begin(); i != cc->end(); ++i)
460      VLOG(0) << "  " << i->lo << "-" << i->hi;
461  }
462
463  // If the class is too large, it's okay to overestimate.
464  if (cc->size() > 10)
465    return AnyChar();
466
467  Prefilter::Info *a = new Prefilter::Info();
468  for (CCIter i = cc->begin(); i != cc->end(); ++i)
469    for (Rune r = i->lo; r <= i->hi; r++) {
470      if (latin1) {
471        a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));
472      } else {
473        a->exact_.insert(RuneToString(ToLowerRune(r)));
474      }
475    }
476
477
478  a->is_exact_ = true;
479
480  if (Trace) {
481    VLOG(0) << " = " << a->ToString();
482  }
483
484  return a;
485}
486
487class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {
488 public:
489  Walker(bool latin1) : latin1_(latin1) {}
490
491  virtual Info* PostVisit(
492      Regexp* re, Info* parent_arg,
493      Info* pre_arg,
494      Info** child_args, int nchild_args);
495
496  virtual Info* ShortVisit(
497      Regexp* re,
498      Info* parent_arg);
499
500  bool latin1() { return latin1_; }
501 private:
502  bool latin1_;
503  DISALLOW_EVIL_CONSTRUCTORS(Walker);
504};
505
506Prefilter::Info* Prefilter::BuildInfo(Regexp* re) {
507  if (Trace) {
508    LOG(INFO) << "BuildPrefilter::Info: " << re->ToString();
509  }
510
511  bool latin1 = re->parse_flags() & Regexp::Latin1;
512  Prefilter::Info::Walker w(latin1);
513  Prefilter::Info* info = w.WalkExponential(re, NULL, 100000);
514
515  if (w.stopped_early()) {
516    delete info;
517    return NULL;
518  }
519
520  return info;
521}
522
523Prefilter::Info* Prefilter::Info::Walker::ShortVisit(
524    Regexp* re, Prefilter::Info* parent_arg) {
525  return AnyMatch();
526}
527
528// Constructs the Prefilter::Info for the given regular expression.
529// Assumes re is simplified.
530Prefilter::Info* Prefilter::Info::Walker::PostVisit(
531    Regexp* re, Prefilter::Info* parent_arg,
532    Prefilter::Info* pre_arg, Prefilter::Info** child_args,
533    int nchild_args) {
534  Prefilter::Info *info;
535  switch (re->op()) {
536    default:
537    case kRegexpRepeat:
538      LOG(DFATAL) << "Bad regexp op " << re->op();
539      info = EmptyString();
540      break;
541
542    case kRegexpNoMatch:
543      info = NoMatch();
544      break;
545
546    // These ops match the empty string:
547    case kRegexpEmptyMatch:      // anywhere
548    case kRegexpBeginLine:       // at beginning of line
549    case kRegexpEndLine:         // at end of line
550    case kRegexpBeginText:       // at beginning of text
551    case kRegexpEndText:         // at end of text
552    case kRegexpWordBoundary:    // at word boundary
553    case kRegexpNoWordBoundary:  // not at word boundary
554      info = EmptyString();
555      break;
556
557    case kRegexpLiteral:
558      if (latin1()) {
559        info = LiteralLatin1(re->rune());
560      }
561      else {
562        info = Literal(re->rune());
563      }
564      break;
565
566    case kRegexpLiteralString:
567      if (re->nrunes() == 0) {
568        info = NoMatch();
569        break;
570      }
571      if (latin1()) {
572        info = LiteralLatin1(re->runes()[0]);
573        for (int i = 1; i < re->nrunes(); i++) {
574          info = Concat(info, LiteralLatin1(re->runes()[i]));
575        }
576      } else {
577        info = Literal(re->runes()[0]);
578        for (int i = 1; i < re->nrunes(); i++) {
579          info = Concat(info, Literal(re->runes()[i]));
580        }
581      }
582      break;
583
584    case kRegexpConcat: {
585      // Accumulate in info.
586      // Exact is concat of recent contiguous exact nodes.
587      info = NULL;
588      Info* exact = NULL;
589      for (int i = 0; i < nchild_args; i++) {
590        Info* ci = child_args[i];  // child info
591        if (!ci->is_exact() ||
592            (exact && ci->exact().size() * exact->exact().size() > 16)) {
593          // Exact run is over.
594          info = And(info, exact);
595          exact = NULL;
596          // Add this child's info.
597          info = And(info, ci);
598        } else {
599          // Append to exact run.
600          exact = Concat(exact, ci);
601        }
602      }
603      info = And(info, exact);
604    }
605      break;
606
607    case kRegexpAlternate:
608      info = child_args[0];
609      for (int i = 1; i < nchild_args; i++)
610        info = Alt(info, child_args[i]);
611      VLOG(10) << "Alt: " << info->ToString();
612      break;
613
614    case kRegexpStar:
615      info = Star(child_args[0]);
616      break;
617
618    case kRegexpQuest:
619      info = Quest(child_args[0]);
620      break;
621
622    case kRegexpPlus:
623      info = Plus(child_args[0]);
624      break;
625
626    case kRegexpAnyChar:
627      // Claim nothing, except that it's not empty.
628      info = AnyChar();
629      break;
630
631    case kRegexpCharClass:
632      info = CClass(re->cc(), latin1());
633      break;
634
635    case kRegexpCapture:
636      // These don't affect the set of matching strings.
637      info = child_args[0];
638      break;
639  }
640
641  if (Trace) {
642    VLOG(0) << "BuildInfo " << re->ToString()
643            << ": " << info->ToString();
644  }
645
646  return info;
647}
648
649
650Prefilter* Prefilter::FromRegexp(Regexp* re) {
651  if (re == NULL)
652    return NULL;
653
654  Regexp* simple = re->Simplify();
655  Prefilter::Info *info = BuildInfo(simple);
656
657  simple->Decref();
658  if (info == NULL)
659    return NULL;
660
661  Prefilter* m = info->TakeMatch();
662
663  delete info;
664  return m;
665}
666
667string Prefilter::DebugString() const {
668  if (this == NULL)
669    return "<nil>";
670
671  switch (op_) {
672    default:
673      LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;
674      return StringPrintf("op%d", op_);
675    case NONE:
676      return "*no-matches*";
677    case ATOM:
678      return atom_;
679    case ALL:
680      return "";
681    case AND: {
682      string s = "";
683      for (int i = 0; i < subs_->size(); i++) {
684        if (i > 0)
685          s += " ";
686        s += (*subs_)[i]->DebugString();
687      }
688      return s;
689    }
690    case OR: {
691      string s = "(";
692      for (int i = 0; i < subs_->size(); i++) {
693        if (i > 0)
694          s += "|";
695        s += (*subs_)[i]->DebugString();
696      }
697      s += ")";
698      return s;
699    }
700  }
701}
702
703Prefilter* Prefilter::FromRE2(const RE2* re2) {
704  if (re2 == NULL)
705    return NULL;
706
707  Regexp* regexp = re2->Regexp();
708  if (regexp == NULL)
709    return NULL;
710
711  return FromRegexp(regexp);
712}
713
714
715}  // namespace re2
716